XeTLA v0.3.6
IntelĀ® Xe Templates for Linear Algebra - API Definition Document
 
Loading...
Searching...
No Matches
common.hpp
Go to the documentation of this file.
1/*******************************************************************************
2* Copyright (c) 2022-2023 Intel Corporation
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*******************************************************************************/
16
19
20#pragma once
21
22#include <CL/sycl.hpp>
23#include <ext/intel/esimd.hpp>
24
25template <class T>
26using remove_const_t = typename std::remove_const<T>::type;
27
30
34#define KERNEL_MAIN SYCL_ESIMD_KERNEL
35
39#define KERNEL_FUNC SYCL_ESIMD_FUNCTION
40
42
43#define __XETLA_API inline
44
45#ifndef __ESIMD_ENS
46#define __ESIMD_ENS sycl::ext::intel::experimental::esimd
47#endif
48
49#ifndef __ESIMD_NS
50#define __ESIMD_NS sycl::ext::intel::esimd
51#endif
52
53#define XETLA_MARKER(message) [[deprecated(message)]]
54#define XETLA_WARNING(msg) __SYCL_WARNING(msg)
55
56template <auto val>
57XETLA_MARKER("Help function to print value")
58inline constexpr void XETLA_PRINT() {}
59template <typename type>
60XETLA_MARKER("Help function to print type")
61inline constexpr void XETLA_PRINT() {}
62
64 return __ESIMD_ENS::get_hw_thread_id();
65}
66
68 return __ESIMD_ENS::get_subdevice_id();
69}
70
71namespace gpu::xetla {
72
73enum class gpu_arch : uint8_t { Xe = 0 };
74enum class grf_mode : uint8_t { normal = 0, double_grf = 1 };
75
76enum class mem_layout : uint8_t { row_major = 0, col_major = 1 };
77enum class mem_space : uint8_t { global = 0, local = 1 };
78enum class msg_type : uint8_t {
79 block_2d = 0,
80 block_1d = 1,
81 scatter = 2,
82 atomic_add = 3,
83 unaligned_2d = 4
84 // prefetch_2d = 4,
85 // prefetch_1d = 5
86};
87
89enum class cache_hint : uint8_t {
90 none = 0,
91 uncached = 1,
92 cached = 2,
93 write_back = 3,
94 write_through = 4,
95 streaming = 5,
97};
98
100enum class data_size : uint8_t {
101 default_size = 0,
102 u8 = 1,
103 u16 = 2,
104 u32 = 3,
105 u64 = 4,
106 u8u32 = 5,
107 u16u32 = 6,
108 u16u32h = 7,
109};
110
112enum class memory_kind : uint8_t {
113 untyped_global = 0,
115 typed_global = 2,
116 shared_local = 3,
117};
118
120enum class fence_op : uint8_t {
121 none = 0,
122 evict = 1,
123 invalidate = 2,
124 discard = 3,
125 clean = 4,
127 flushl2 = 5,
128};
130enum class fence_scope : uint8_t {
131 group = 0,
132 local = 1,
133 tile = 2,
134 gpu = 3,
135 gpus = 4,
136 system = 5,
137 sysacq = 6,
138};
139
142enum class atomic_op : uint8_t {
144 iinc = 0x0,
146 idec = 0x1,
148 iadd = 0x2,
150 isub = 0x3,
152 smin = 0x4,
154 smax = 0x5,
156 cmpxchg = 0x6,
158 fadd = 0x7,
160 fsub = 0x8,
162 fmin = 0x9,
164 fmax = 0xa,
166 fcmpxchg = 0xb,
168 umin = 0xc,
170 umax = 0xd,
172 bit_and = 0xe,
174 bit_or = 0xf,
176 bit_xor = 0x10,
178 load = 0x11,
180 store = 0x12
181};
182
184enum class argument_type : uint8_t {
185 U1 = 0, // unsigned 1 bit
186 S1 = 1, // signed 1 bit
187 U2 = 2, // unsigned 2 bits
188 S2 = 3, // signed 2 bits
189 U4 = 4, // unsigned 4 bits
190 S4 = 5, // signed 4 bits
191 U8 = 6, // unsigned 8 bits
192 S8 = 7, // signed 8 bits
193 BF16 = 8, // bfloat 16
194 FP16 = 9, // half float
195 TF32 = 12, // tensorfloat 32
196 DF = 13, // double (64bits)
197 NUM_ARG_TYPES = 14
198};
199
200// Saturation tag
202public:
203 using sat_tag = typename __ESIMD_NS::saturation_on_tag;
204 static constexpr sat_tag value = {};
205};
206
208public:
209 using sat_tag = typename __ESIMD_NS::saturation_off_tag;
210 static constexpr sat_tag value = {};
211};
212
213template <typename T>
214using is_xetla_scalar = typename __ESIMD_DNS::is_esimd_scalar<T>;
215
217enum class reduce_op : uint8_t {
218 sum = 0, // performance reduce_sum
219 prod = 1, // performance reduce_prod
220 min = 2, // performance reduce_min
221 max = 3, // performance reduce_max
222};
223
226
227#define SW_BARRIER() __ESIMD_NS::fence<__ESIMD_NS::fence_mask::sw_barrier>()
228
229__XETLA_API void xetla_wait(uint16_t val) {
230 __ESIMD_ENS::wait(__ESIMD_NS::simd<uint16_t, 1>(val));
231}
232
233} // namespace gpu::xetla
Definition common.hpp:207
static constexpr sat_tag value
Definition common.hpp:210
typename sycl::ext::intel::esimd ::saturation_off_tag sat_tag
Definition common.hpp:209
Definition common.hpp:201
static constexpr sat_tag value
Definition common.hpp:204
typename sycl::ext::intel::esimd ::saturation_on_tag sat_tag
Definition common.hpp:203
typename std::remove_const< T >::type remove_const_t
Definition common.hpp:26
int32_t xetla_get_subdevice_id()
Definition common.hpp:67
constexpr void XETLA_PRINT()
Definition common.hpp:58
#define XETLA_MARKER(message)
Definition common.hpp:53
#define __XETLA_API
Definition common.hpp:43
int32_t xetla_get_hw_thread_id()
Definition common.hpp:63
Definition arch_config.hpp:24
typename __ESIMD_DNS::is_esimd_scalar< T > is_xetla_scalar
Definition common.hpp:214
cache_hint
L1 or L2 cache hint kinds.
Definition common.hpp:89
data_size
Data size or format to read or store.
Definition common.hpp:100
@ u16u32h
load 16b, zero extend to 32b; store the opposite
@ u16u32
load 8b, zero extend to 32b; store the opposite
fence_op
The xetla_fence operation to apply to caches.
Definition common.hpp:120
@ clean
direct and clean lines are discarded w/o eviction
@ flushl2
dirty lines are written to memory, but retained in cache
@ discard
invalidate all clean lines
@ invalidate
dirty lines evicted and invalidated from L1
@ evict
no operation
fence_scope
The scope that xetla_fence operation should apply to.
Definition common.hpp:130
@ tile
flush out to the local scope
@ gpus
entire GPU, flush out to the GPUs LLC
@ sysacq
the entire system memory space
@ system
all GPUs in the system, flush out to memory shared by all GPUs
memory_kind
The specific LSC shared function to fence with xetla_fence.
Definition common.hpp:112
@ typed_global
low-priority untyped global memory
@ untyped_global_low_pri
untyped global memory
@ shared_local
typed global memory
reduce_op
xetla reduce op
Definition common.hpp:217
mem_space
Definition common.hpp:77
grf_mode
Definition common.hpp:74
atomic_op
Represents an atomic operation.
Definition common.hpp:142
@ umin
Atomic store the unsigned int min of src1 and memory data and return the old value....
@ fsub
Atomic float subtract of src1 from memory data and return the old value. see
@ bit_or
Atomic store the bitwise OR of src1 and memory data and return the old value. see
@ iadd
Atomic signed int add of src1 from memory data and return the old value. see
@ smin
Atomic store the signed int min of src1 and memory data and return the old value. see
@ cmpxchg
Atomic bit-compare src1_X and memory data and replace if equal with src1_Y. Returns the old value....
@ fmax
Atomic store the float max of src1 and memory data and return the old value. see
@ fadd
Atomic float add of src1 from memory data and return the old value. see
@ idec
Atomic decrement of memory data and return the old value. see
@ umax
Atomic store the unsigned int max of src1 and memory data and return the old value....
@ store
Atomic store untyped data to memory. see
@ fmin
Atomic store the float min of src1 and memory data and return the old value. see
@ bit_and
Atomic store the bitwise AND of src1 and memory data and return the old value. see
@ iinc
Atomic increment of memory data and return the old value. see
@ smax
Atomic store the signed int max of src1 and memory data and return the old value. see
@ bit_xor
Atomic store the bitwise XOR of src1 and memory data and return the old value. see
@ isub
Atomic signed int subtract of src1 from memory data and return the old value. see
@ fcmpxchg
Atomic float compare src1_X and memory data and replace if equal with src1_Y. Returns the old value....
@ load
Atomic read of the memory data value, without modifying the data. see
gpu_arch
Definition common.hpp:73
msg_type
Definition common.hpp:78
void xetla_wait(uint16_t val)
Definition common.hpp:229
argument_type
xetla dpas argument typ
Definition common.hpp:184
mem_layout
Definition common.hpp:76
Definition arch_config.hpp:24