11 #define SYCL_EXT_ONEAPI_CUDA_TEX_CACHE_READ 1
15 #if defined(_WIN32) || defined(_WIN64)
16 #define ATTRIBUTE_EXT_VEC_TYPE(N) __declspec(ext_vector_type(N))
18 #define ATTRIBUTE_EXT_VEC_TYPE(N) __attribute__((ext_vector_type(N)))
22 inline namespace _V1 {
25 namespace experimental {
60 #if defined(__SYCL_DEVICE_ONLY__)
61 #if defined(__NVPTX__)
62 if constexpr (std::is_same_v<T, char>) {
63 return __nvvm_ldg_c(ptr);
64 }
else if constexpr (std::is_same_v<T, signed char>) {
65 return __nvvm_ldg_sc(ptr);
66 }
else if constexpr (std::is_same_v<T, short>) {
67 return __nvvm_ldg_s(ptr);
68 }
else if constexpr (std::is_same_v<T, int>) {
69 return __nvvm_ldg_i(ptr);
70 }
else if constexpr (std::is_same_v<T, long>) {
71 return __nvvm_ldg_l(ptr);
72 }
else if constexpr (std::is_same_v<T, long long>) {
73 return __nvvm_ldg_ll(ptr);
74 }
else if constexpr (std::is_same_v<T, unsigned char>) {
75 return __nvvm_ldg_uc(ptr);
76 }
else if constexpr (std::is_same_v<T, unsigned short>) {
77 return __nvvm_ldg_us(ptr);
78 }
else if constexpr (std::is_same_v<T, unsigned int>) {
79 return __nvvm_ldg_ui(ptr);
80 }
else if constexpr (std::is_same_v<T, unsigned long>) {
81 return __nvvm_ldg_ul(ptr);
82 }
else if constexpr (std::is_same_v<T, unsigned long long>) {
83 return __nvvm_ldg_ull(ptr);
84 }
else if constexpr (std::is_same_v<T, half>) {
85 auto native =
reinterpret_cast<const __fp16 *
>(ptr);
86 return __nvvm_ldg_h(
native);
87 }
else if constexpr (std::is_same_v<T, float>) {
88 return __nvvm_ldg_f(ptr);
89 }
else if constexpr (std::is_same_v<T, double>) {
90 return __nvvm_ldg_d(ptr);
96 c2 rv = __nvvm_ldg_c2(
reinterpret_cast<const c2 *
>(ptr));
103 c2 rv_2 = __nvvm_ldg_c2(
reinterpret_cast<const c2 *
>(ptr));
104 char rv = __nvvm_ldg_c(
reinterpret_cast<const char *
>(
105 std::next(
reinterpret_cast<const c2 *
>(ptr))));
113 c4 rv = __nvvm_ldg_c4(
reinterpret_cast<const c4 *
>(ptr));
122 sc2 rv = __nvvm_ldg_sc2(
reinterpret_cast<const sc2 *
>(ptr));
129 sc2 rv_2 = __nvvm_ldg_sc2(
reinterpret_cast<const sc2 *
>(ptr));
130 signed char rv = __nvvm_ldg_sc(
reinterpret_cast<const signed char *
>(
131 std::next(
reinterpret_cast<const sc2 *
>(ptr))));
139 sc4 rv = __nvvm_ldg_sc4(
reinterpret_cast<const sc4 *
>(ptr));
148 s2 rv = __nvvm_ldg_s2(
reinterpret_cast<const s2 *
>(ptr));
155 s2 rv_2 = __nvvm_ldg_s2(
reinterpret_cast<const s2 *
>(ptr));
156 short rv = __nvvm_ldg_s(
reinterpret_cast<const short *
>(
157 std::next(
reinterpret_cast<const s2 *
>(ptr))));
165 s4 rv = __nvvm_ldg_s4(
reinterpret_cast<const s4 *
>(ptr));
174 i2 rv = __nvvm_ldg_i2(
reinterpret_cast<const i2 *
>(ptr));
181 i2 rv_2 = __nvvm_ldg_i2(
reinterpret_cast<const i2 *
>(ptr));
182 int rv = __nvvm_ldg_i(
reinterpret_cast<const int *
>(
183 std::next(
reinterpret_cast<const i2 *
>(ptr))));
191 i4 rv = __nvvm_ldg_i4(
reinterpret_cast<const i4 *
>(ptr));
200 l2 rv = __nvvm_ldg_l2(
reinterpret_cast<const l2 *
>(ptr));
207 l2 rv_2 = __nvvm_ldg_l2(
reinterpret_cast<const l2 *
>(ptr));
208 long rv = __nvvm_ldg_l(
reinterpret_cast<const long *
>(
209 std::next(
reinterpret_cast<const l2 *
>(ptr))));
217 l2 rv1 = __nvvm_ldg_l2(
reinterpret_cast<const l2 *
>(ptr));
218 l2 rv2 = __nvvm_ldg_l2(std::next(
reinterpret_cast<const l2 *
>(ptr)));
227 ll2 rv = __nvvm_ldg_ll2(
reinterpret_cast<const ll2 *
>(ptr));
234 ll2 rv_2 = __nvvm_ldg_ll2(
reinterpret_cast<const ll2 *
>(ptr));
235 long long rv = __nvvm_ldg_ll(
reinterpret_cast<const long long *
>(
236 std::next(
reinterpret_cast<const ll2 *
>(ptr))));
244 ll2 rv1 = __nvvm_ldg_ll2(
reinterpret_cast<const ll2 *
>(ptr));
245 ll2 rv2 = __nvvm_ldg_ll2(std::next(
reinterpret_cast<const ll2 *
>(ptr)));
254 uc2 rv = __nvvm_ldg_uc2(
reinterpret_cast<const uc2 *
>(ptr));
261 uc2 rv_2 = __nvvm_ldg_uc2(
reinterpret_cast<const uc2 *
>(ptr));
262 unsigned char rv = __nvvm_ldg_uc(
reinterpret_cast<const unsigned char *
>(
263 std::next(
reinterpret_cast<const uc2 *
>(ptr))));
271 uc4 rv = __nvvm_ldg_uc4(
reinterpret_cast<const uc4 *
>(ptr));
280 us2 rv = __nvvm_ldg_us2(
reinterpret_cast<const us2 *
>(ptr));
287 us2 rv_2 = __nvvm_ldg_us2(
reinterpret_cast<const us2 *
>(ptr));
288 unsigned short rv = __nvvm_ldg_us(
reinterpret_cast<const unsigned short *
>(
289 std::next(
reinterpret_cast<const us2 *
>(ptr))));
297 us4 rv = __nvvm_ldg_us4(
reinterpret_cast<const us4 *
>(ptr));
306 ui2 rv = __nvvm_ldg_ui2(
reinterpret_cast<const ui2 *
>(ptr));
313 ui2 rv_2 = __nvvm_ldg_ui2(
reinterpret_cast<const ui2 *
>(ptr));
314 unsigned int rv = __nvvm_ldg_ui(
reinterpret_cast<const unsigned int *
>(
315 std::next(
reinterpret_cast<const ui2 *
>(ptr))));
323 ui4 rv = __nvvm_ldg_ui4(
reinterpret_cast<const ui4 *
>(ptr));
332 ul2 rv = __nvvm_ldg_ul2(
reinterpret_cast<const ul2 *
>(ptr));
339 ul2 rv_2 = __nvvm_ldg_ul2(
reinterpret_cast<const ul2 *
>(ptr));
340 unsigned long rv = __nvvm_ldg_ul(
reinterpret_cast<const unsigned long *
>(
341 std::next(
reinterpret_cast<const ul2 *
>(ptr))));
349 ul2 rv1 = __nvvm_ldg_ul2(
reinterpret_cast<const ul2 *
>(ptr));
350 ul2 rv2 = __nvvm_ldg_ul2(std::next(
reinterpret_cast<const ul2 *
>(ptr)));
359 ull2 rv = __nvvm_ldg_ull2(
reinterpret_cast<const ull2 *
>(ptr));
366 ull2 rv_2 = __nvvm_ldg_ull2(
reinterpret_cast<const ull2 *
>(ptr));
367 unsigned long long rv =
368 __nvvm_ldg_ull(
reinterpret_cast<const unsigned long long *
>(
369 std::next(
reinterpret_cast<const ull2 *
>(ptr))));
377 ull2 rv1 = __nvvm_ldg_ull2(
reinterpret_cast<const ull2 *
>(ptr));
378 ull2 rv2 = __nvvm_ldg_ull2(std::next(
reinterpret_cast<const ull2 *
>(ptr)));
387 auto rv = __nvvm_ldg_h2(
reinterpret_cast<const h2 *
>(ptr));
394 h2 rv_2 = __nvvm_ldg_h2(
reinterpret_cast<const h2 *
>(ptr));
395 auto rv = __nvvm_ldg_h(
reinterpret_cast<const __fp16 *
>(
396 std::next(
reinterpret_cast<const h2 *
>(ptr))));
404 auto rv1 = __nvvm_ldg_h2(
reinterpret_cast<const h2 *
>(ptr));
405 auto rv2 = __nvvm_ldg_h2(std::next(
reinterpret_cast<const h2 *
>(ptr)));
414 f2 rv = __nvvm_ldg_f2(
reinterpret_cast<const f2 *
>(ptr));
421 f2 rv_2 = __nvvm_ldg_f2(
reinterpret_cast<const f2 *
>(ptr));
422 float rv = __nvvm_ldg_f(
reinterpret_cast<const float *
>(
423 std::next(
reinterpret_cast<const f2 *
>(ptr))));
431 f4 rv = __nvvm_ldg_f4(
reinterpret_cast<const f4 *
>(ptr));
440 d2 rv = __nvvm_ldg_d2(
reinterpret_cast<const d2 *
>(ptr));
447 d2 rv_2 = __nvvm_ldg_d2(
reinterpret_cast<const d2 *
>(ptr));
448 double rv = __nvvm_ldg_d(
reinterpret_cast<const double *
>(
449 std::next(
reinterpret_cast<const d2 *
>(ptr))));
457 d2 rv1 = __nvvm_ldg_d2(
reinterpret_cast<const d2 *
>(ptr));
458 d2 rv2 = __nvvm_ldg_d2(std::next(
reinterpret_cast<const d2 *
>(ptr)));
470 throw runtime_error(
"ldg is not supported on host.", PI_ERROR_INVALID_DEVICE);
474 #undef ATTRIBUTE_EXT_VEC_TYPE
Provides a cross-patform vector class template that works efficiently on SYCL devices as well as in h...
#define __SYCL_ALWAYS_INLINE
#define ATTRIBUTE_EXT_VEC_TYPE(N)
tl_append< scalar_unsigned_integer_list > scalar_unsigned_basic_list
tl_append< scalar_floating_list, scalar_signed_integer_list > scalar_signed_basic_list
boost::mp11::mp_append< L... > tl_append
boost::mp11::mp_set_contains< TypeList, std::remove_cv_t< T > > is_contained
boost::mp11::mp_list< T... > type_list
sycl::detail::tl_append< ldg_vector_types, sycl::detail::gtl::scalar_signed_basic_list, sycl::detail::gtl::scalar_unsigned_basic_list > ldg_types
sycl::detail::type_list< sycl::vec< char, 2 >, sycl::vec< char, 3 >, sycl::vec< char, 4 >, sycl::vec< signed char, 2 >, sycl::vec< signed char, 3 >, sycl::vec< signed char, 4 >, sycl::vec< short, 2 >, sycl::vec< short, 3 >, sycl::vec< short, 4 >, sycl::vec< int, 2 >, sycl::vec< int, 3 >, sycl::vec< int, 4 >, sycl::vec< long, 2 >, sycl::vec< long, 3 >, sycl::vec< long, 4 >, sycl::vec< long long, 2 >, sycl::vec< long long, 3 >, sycl::vec< long long, 4 >, sycl::vec< unsigned char, 2 >, sycl::vec< unsigned char, 3 >, sycl::vec< unsigned char, 4 >, sycl::vec< unsigned short, 2 >, sycl::vec< unsigned short, 3 >, sycl::vec< unsigned short, 4 >, sycl::vec< unsigned int, 2 >, sycl::vec< unsigned int, 3 >, sycl::vec< unsigned int, 4 >, sycl::vec< unsigned long, 2 >, sycl::vec< unsigned long, 3 >, sycl::vec< unsigned long, 4 >, sycl::vec< unsigned long long, 2 >, sycl::vec< unsigned long long, 3 >, sycl::vec< unsigned long long, 4 >, sycl::vec< half, 2 >, sycl::vec< half, 3 >, sycl::vec< half, 4 >, sycl::vec< float, 2 >, sycl::vec< float, 3 >, sycl::vec< float, 4 >, sycl::vec< double, 2 >, sycl::vec< double, 3 >, sycl::vec< double, 4 > > ldg_vector_types
__SYCL_ALWAYS_INLINE std::enable_if_t< sycl::detail::is_contained< T, sycl::ext::oneapi::experimental::cuda::detail::ldg_types >::value, T > ldg(const T *ptr)