Core C++-API#

Common#

Common Enumerations#

status#

enum class tinytc::status#

Cf. tinytc_status_t.

A status is typically thrown as exception, hence one should wrap calls as following:

try {
  ...
} catch (tinytc::status const& st) {
  ...
}

Values:

enumerator success = tinytc_status_success#
enumerator bad_alloc = tinytc_status_bad_alloc#
enumerator invalid_arguments = tinytc_status_invalid_arguments#
enumerator out_of_range = tinytc_status_out_of_range#
enumerator runtime_error = tinytc_status_runtime_error#
enumerator internal_compiler_error = tinytc_status_internal_compiler_error#
enumerator unsupported_subgroup_size = tinytc_status_unsupported_subgroup_size#
enumerator unsupported_work_group_size = tinytc_status_unsupported_work_group_size#
enumerator compilation_error = tinytc_status_compilation_error#
enumerator file_io_error = tinytc_status_file_io_error#
enumerator parse_error = tinytc_status_parse_error#
enumerator unavailable_extension = tinytc_status_unavailable_extension#
enumerator unsupported_backend = tinytc_status_unsupported_backend#
enumerator invalid_kernel_arguments = tinytc_status_invalid_kernel_arguments#
enumerator unsupported_device = tinytc_status_unsupported_device#
enumerator ir_out_of_bounds = tinytc_status_ir_out_of_bounds#
enumerator ir_invalid_shape = tinytc_status_ir_invalid_shape#
enumerator ir_incompatible_shapes = tinytc_status_ir_incompatible_shapes#
enumerator ir_shape_stride_mismatch = tinytc_status_ir_shape_stride_mismatch#
enumerator ir_scalar_mismatch = tinytc_status_ir_scalar_mismatch#
enumerator ir_invalid_number_of_indices = tinytc_status_ir_invalid_number_of_indices#
enumerator ir_expected_scalar = tinytc_status_ir_expected_scalar#
enumerator ir_expected_memref = tinytc_status_ir_expected_memref#
enumerator ir_expected_memref_or_scalar = tinytc_status_ir_expected_memref_or_scalar#
enumerator ir_expected_memref_or_group = tinytc_status_ir_expected_memref_or_group#
enumerator ir_expected_vector_or_matrix = tinytc_status_ir_expected_vector_or_matrix#
enumerator ir_unexpected_yield = tinytc_status_ir_unexpected_yield#
enumerator ir_yield_mismatch = tinytc_status_ir_yield_mismatch#
enumerator ir_multiple_dynamic_modes = tinytc_status_ir_multiple_dynamic_modes#
enumerator ir_invalid_slice = tinytc_status_ir_invalid_slice#
enumerator ir_expand_shape_order_too_small = tinytc_status_ir_expand_shape_order_too_small#
enumerator ir_expand_shape_mismatch = tinytc_status_ir_expand_shape_mismatch#
enumerator ir_collective_called_from_spmd = tinytc_status_ir_collective_called_from_spmd#
enumerator ir_fp_unsupported = tinytc_status_ir_fp_unsupported#
enumerator ze_result_not_ready = tinytc_status_ze_result_not_ready#
enumerator ze_result_error_device_lost = tinytc_status_ze_result_error_device_lost#
enumerator ze_result_error_out_of_host_memory = tinytc_status_ze_result_error_out_of_host_memory#
enumerator ze_result_error_out_of_device_memory = tinytc_status_ze_result_error_out_of_device_memory#
enumerator ze_result_error_module_build_failure = tinytc_status_ze_result_error_module_build_failure#
enumerator ze_result_error_device_requires_reset = tinytc_status_ze_result_error_device_requires_reset#
enumerator ze_result_error_device_in_low_power_state = tinytc_status_ze_result_error_device_in_low_power_state#
enumerator ze_result_exp_error_device_is_not_vertex = tinytc_status_ze_result_exp_error_device_is_not_vertex#
enumerator ze_result_exp_error_vertex_is_not_device = tinytc_status_ze_result_exp_error_vertex_is_not_device#
enumerator ze_result_exp_error_remote_device = tinytc_status_ze_result_exp_error_remote_device#
enumerator ze_result_exp_error_operands_incompatible = tinytc_status_ze_result_exp_error_operands_incompatible#
enumerator ze_result_exp_rtas_build_retry = tinytc_status_ze_result_exp_rtas_build_retry#
enumerator ze_result_exp_rtas_build_deferred = tinytc_status_ze_result_exp_rtas_build_deferred#
enumerator ze_result_error_insufficient_permissions = tinytc_status_ze_result_error_insufficient_permissions#
enumerator ze_result_error_not_available = tinytc_status_ze_result_error_not_available#
enumerator ze_result_error_dependency_unavailable = tinytc_status_ze_result_error_dependency_unavailable#
enumerator ze_result_warning_dropped_data = tinytc_status_ze_result_warning_dropped_data#
enumerator ze_result_error_uninitialized = tinytc_status_ze_result_error_uninitialized#
enumerator ze_result_error_unsupported_version = tinytc_status_ze_result_error_unsupported_version#
enumerator ze_result_error_unsupported_feature = tinytc_status_ze_result_error_unsupported_feature#
enumerator ze_result_error_invalid_argument = tinytc_status_ze_result_error_invalid_argument#
enumerator ze_result_error_invalid_null_handle = tinytc_status_ze_result_error_invalid_null_handle#
enumerator ze_result_error_handle_object_in_use = tinytc_status_ze_result_error_handle_object_in_use#
enumerator ze_result_error_invalid_null_pointer = tinytc_status_ze_result_error_invalid_null_pointer#
enumerator ze_result_error_invalid_size = tinytc_status_ze_result_error_invalid_size#
enumerator ze_result_error_unsupported_size = tinytc_status_ze_result_error_unsupported_size#
enumerator ze_result_error_unsupported_alignment = tinytc_status_ze_result_error_unsupported_alignment#
enumerator ze_result_error_invalid_synchronization_object = tinytc_status_ze_result_error_invalid_synchronization_object#
enumerator ze_result_error_invalid_enumeration = tinytc_status_ze_result_error_invalid_enumeration#
enumerator ze_result_error_unsupported_enumeration = tinytc_status_ze_result_error_unsupported_enumeration#
enumerator ze_result_error_unsupported_image_format = tinytc_status_ze_result_error_unsupported_image_format#
enumerator ze_result_error_invalid_native_binary = tinytc_status_ze_result_error_invalid_native_binary#
enumerator ze_result_error_invalid_global_name = tinytc_status_ze_result_error_invalid_global_name#
enumerator ze_result_error_invalid_kernel_name = tinytc_status_ze_result_error_invalid_kernel_name#
enumerator ze_result_error_invalid_function_name = tinytc_status_ze_result_error_invalid_function_name#
enumerator ze_result_error_invalid_group_size_dimension = tinytc_status_ze_result_error_invalid_group_size_dimension#
enumerator ze_result_error_invalid_global_width_dimension = tinytc_status_ze_result_error_invalid_global_width_dimension#
enumerator ze_result_error_invalid_kernel_argument_index = tinytc_status_ze_result_error_invalid_kernel_argument_index#
enumerator ze_result_error_invalid_kernel_argument_size = tinytc_status_ze_result_error_invalid_kernel_argument_size#
enumerator ze_result_error_invalid_kernel_attribute_value = tinytc_status_ze_result_error_invalid_kernel_attribute_value#
enumerator ze_result_error_invalid_module_unlinked = tinytc_status_ze_result_error_invalid_module_unlinked#
enumerator ze_result_error_invalid_command_list_type = tinytc_status_ze_result_error_invalid_command_list_type#
enumerator ze_result_error_overlapping_regions = tinytc_status_ze_result_error_overlapping_regions#
enumerator ze_result_warning_action_required = tinytc_status_ze_result_warning_action_required#
enumerator ze_result_error_unknown = tinytc_status_ze_result_error_unknown#
enumerator cl_build_program_failure = tinytc_status_cl_build_program_failure#
enumerator cl_compile_program_failure = tinytc_status_cl_compile_program_failure#
enumerator cl_compiler_not_available = tinytc_status_cl_compiler_not_available#
enumerator cl_device_not_found = tinytc_status_cl_device_not_found#
enumerator cl_device_not_available = tinytc_status_cl_device_not_available#
enumerator cl_device_partition_failed = tinytc_status_cl_device_partition_failed#
enumerator cl_exec_status_error_for_events_in_wait_list = tinytc_status_cl_exec_status_error_for_events_in_wait_list#
enumerator cl_image_format_mismatch = tinytc_status_cl_image_format_mismatch#
enumerator cl_image_format_not_supported = tinytc_status_cl_image_format_not_supported#
enumerator cl_invalid_arg_index = tinytc_status_cl_invalid_arg_index#
enumerator cl_invalid_arg_size = tinytc_status_cl_invalid_arg_size#
enumerator cl_invalid_arg_value = tinytc_status_cl_invalid_arg_value#
enumerator cl_invalid_binary = tinytc_status_cl_invalid_binary#
enumerator cl_invalid_buffer_size = tinytc_status_cl_invalid_buffer_size#
enumerator cl_invalid_build_options = tinytc_status_cl_invalid_build_options#
enumerator cl_invalid_command_queue = tinytc_status_cl_invalid_command_queue#
enumerator cl_invalid_compiler_options = tinytc_status_cl_invalid_compiler_options#
enumerator cl_invalid_context = tinytc_status_cl_invalid_context#
enumerator cl_invalid_device = tinytc_status_cl_invalid_device#
enumerator cl_invalid_device_partition_count = tinytc_status_cl_invalid_device_partition_count#
enumerator cl_invalid_device_queue = tinytc_status_cl_invalid_device_queue#
enumerator cl_invalid_device_type = tinytc_status_cl_invalid_device_type#
enumerator cl_invalid_event = tinytc_status_cl_invalid_event#
enumerator cl_invalid_event_wait_list = tinytc_status_cl_invalid_event_wait_list#
enumerator cl_invalid_global_offset = tinytc_status_cl_invalid_global_offset#
enumerator cl_invalid_global_work_size = tinytc_status_cl_invalid_global_work_size#
enumerator cl_invalid_host_ptr = tinytc_status_cl_invalid_host_ptr#
enumerator cl_invalid_image_descriptor = tinytc_status_cl_invalid_image_descriptor#
enumerator cl_invalid_image_format_descriptor = tinytc_status_cl_invalid_image_format_descriptor#
enumerator cl_invalid_image_size = tinytc_status_cl_invalid_image_size#
enumerator cl_invalid_kernel = tinytc_status_cl_invalid_kernel#
enumerator cl_invalid_kernel_args = tinytc_status_cl_invalid_kernel_args#
enumerator cl_invalid_kernel_definition = tinytc_status_cl_invalid_kernel_definition#
enumerator cl_invalid_kernel_name = tinytc_status_cl_invalid_kernel_name#
enumerator cl_invalid_linker_options = tinytc_status_cl_invalid_linker_options#
enumerator cl_invalid_mem_object = tinytc_status_cl_invalid_mem_object#
enumerator cl_invalid_operation = tinytc_status_cl_invalid_operation#
enumerator cl_invalid_pipe_size = tinytc_status_cl_invalid_pipe_size#
enumerator cl_invalid_platform = tinytc_status_cl_invalid_platform#
enumerator cl_invalid_program = tinytc_status_cl_invalid_program#
enumerator cl_invalid_program_executable = tinytc_status_cl_invalid_program_executable#
enumerator cl_invalid_property = tinytc_status_cl_invalid_property#
enumerator cl_invalid_queue_properties = tinytc_status_cl_invalid_queue_properties#
enumerator cl_invalid_sampler = tinytc_status_cl_invalid_sampler#
enumerator cl_invalid_spec_id = tinytc_status_cl_invalid_spec_id#
enumerator cl_invalid_value = tinytc_status_cl_invalid_value#
enumerator cl_invalid_work_dimension = tinytc_status_cl_invalid_work_dimension#
enumerator cl_invalid_work_group_size = tinytc_status_cl_invalid_work_group_size#
enumerator cl_invalid_work_item_size = tinytc_status_cl_invalid_work_item_size#
enumerator cl_kernel_arg_info_not_available = tinytc_status_cl_kernel_arg_info_not_available#
enumerator cl_linker_not_available = tinytc_status_cl_linker_not_available#
enumerator cl_map_failure = tinytc_status_cl_map_failure#
enumerator cl_mem_copy_overlap = tinytc_status_cl_mem_copy_overlap#
enumerator cl_mem_object_allocation_failure = tinytc_status_cl_mem_object_allocation_failure#
enumerator cl_misaligned_sub_buffer_offset = tinytc_status_cl_misaligned_sub_buffer_offset#
enumerator cl_out_of_host_memory = tinytc_status_cl_out_of_host_memory#
enumerator cl_out_of_resources = tinytc_status_cl_out_of_resources#
enumerator cl_max_size_restriction_exceeded = tinytc_status_cl_max_size_restriction_exceeded#
enumerator cl_profiling_info_not_available = tinytc_status_cl_profiling_info_not_available#
enumerator unknown = tinytc_status_unknown#

support_level#

enum class tinytc::support_level#

Support level of a device.

Values:

enumerator none = tinytc_support_level_none#

Device is unsupported (e.g. subgroups feature missing in OpenCL-C)

enumerator basic = tinytc_support_level_basic#

Device provides necessary features but is not well tested.

enumerator tuned = tinytc_support_level_tuned#

Device provides necessary features and is well tested.

Common Functions#

error_string#

inline char const *tinytc::error_string(status code)#

Convert error code to string.

CHECK_STATUS#

inline void tinytc::CHECK_STATUS(tinytc_status_t code)#

Throw exception for unsuccessful call to C-API.

CHECK_STATUS_LOC#

inline void tinytc::CHECK_STATUS_LOC(tinytc_status_t code, location const &loc)#

Throw exception for unsuccessful call to C-API.

Common Classes#

shared_handle#

template<typename T>
class shared_handle#

Wraps a C handle in a reference-counted object.

Template Parameters:

T – C handle type (handle type = pointer to opaque struct)

Public Types

using traits = internal::shared_handle_traits<T>#

Traits shortcut.

using native_type = T#

Typedef for native C handle.

Public Functions

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline ~shared_handle()#

Decrease reference count.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

inline shared_handle &operator=(shared_handle const &other)#

Copy operator.

inline shared_handle &operator=(shared_handle &&other)#

Move operator.

inline auto operator*() const -> std::remove_pointer_t<T>&#

Dereference C handle and get reference to underlying type.

inline auto operator->() const -> T#

Convert handle to C handle.

inline auto get() const -> T#

Returns C handle.

inline auto release() -> T#

Returns C handle and releases the ownership of the managed object.

inline explicit operator bool() const noexcept#

Check whether handle is non-empty (valid)

inline bool operator==(shared_handle<T> const &other) const#

Check equality.

inline bool operator!=(shared_handle<T> const &other) const#

Check inequality.

unique_handle#

template<typename T>
class unique_handle#

Wraps a C handle in a unique_ptr-alike object.

Template Parameters:

T – C handle type (handle type = pointer to opaque struct)

Public Types

using traits = internal::unique_handle_traits<T>#

Traits shortcut.

using native_type = T#

Typedef for native C handle.

Public Functions

inline unique_handle()#

Create empty (invalid) handle.

inline explicit unique_handle(T obj)#

Create handle from C handle.

inline ~unique_handle()#

Destroy object.

unique_handle(unique_handle const &other) = delete#

Copy ctor.

inline unique_handle(unique_handle &&other) noexcept#

Move ctor.

unique_handle &operator=(unique_handle const &other) = delete#

Copy operator.

inline unique_handle &operator=(unique_handle &&other)#

Move operator.

inline auto operator*() const -> std::remove_pointer_t<T>&#

Dereference C handle and get reference to underlying type.

inline auto operator->() const -> T#

Convert handle to C handle.

inline auto get() const -> T#

Returns C handle.

inline auto release() -> T#

Returns C handle and releases the ownership of the managed object.

inline explicit operator bool() const noexcept#

Check whether handle is non-empty (valid)

inline bool operator==(unique_handle<T> const &other) const#

Check equality.

inline bool operator!=(unique_handle<T> const &other) const#

Check inequality.

Binary#

Binary Enumerations#

bundle_format#

enum class tinytc::bundle_format#

Target binary format.

Values:

enumerator spirv = tinytc_bundle_format_spirv#

SPIR-V.

enumerator native = tinytc_bundle_format_native#

Native device binary.

Binary Functions#

make_binary#

inline auto tinytc::make_binary(bundle_format format, std::size_t data_size, std::uint8_t const *data, tinytc_core_feature_flags_t core_features) -> binary#

Make binary.

Parameters:
  • format – Bundle format (SPIR-V or Native)

  • data_size – Size of data in bytes

  • data – Binary data; data is copied

  • core_features – requested core features; must be 0 (default) or a combination of tinytc_core_feature_flag_t

Returns:

Binary

Binary Classes#

binary#

class binary : public tinytc::shared_handle<tinytc_binary_t>#

Reference-counting wrapper for tinytc_binary_t.

Public Functions

inline auto get_raw() -> raw#

Get raw data.

Returns:

Raw data

inline auto get_core_features() -> tinytc_core_feature_flags_t#

Get core features.

Returns:

Core features

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

struct raw#

Container for raw data.

Public Members

bundle_format format#

Bundle format.

std::size_t data_size#

Size of binary data in bytes.

std::uint8_t const *data#

Pointer to binary data.

Compiler#

Compiler Functions#

compile_to_opencl#

inline auto tinytc::compile_to_opencl(prog prg, core_info const &info, source_context ctx = {}) -> source#

Compile program to OpenCL-C.

Parameters:
  • prg – Program

  • info – Core info

  • ctx – Source context for improved error reporting

Returns:

Source

Device Info#

Device Info Enumerations#

core_feature_flag#

enum class tinytc::core_feature_flag#

Cf. tinytc_core_feature_flag_t.

Values:

enumerator large_register_file = tinytc_core_feature_flag_large_register_file#

intel_gpu_architecture#

enum class tinytc::intel_gpu_architecture#

Cf. tinytc_intel_gpu_architecture_t.

Values:

enumerator tgl = tinytc_intel_gpu_architecture_tgl#
enumerator pvc = tinytc_intel_gpu_architecture_pvc#

Device Info Functions#

make_core_info_generic#

inline auto tinytc::make_core_info_generic(std::int32_t register_space, std::int32_t max_work_group_size, std::vector<std::int32_t> sgs) -> core_info#

Create core info for generic GPUs manually.

Parameters:
  • register_space – Size of register file per subgroup in bytes

  • max_work_group_size – Maximum size of local work group

  • sgs – Subgrouip sizes

Returns:

Core info

make_core_info_intel#

inline auto tinytc::make_core_info_intel(std::uint32_t ip_version, std::int32_t num_eus_per_subslice, std::int32_t num_threads_per_eu, std::vector<std::int32_t> sgs) -> core_info#

Create core info for Intel GPUs manually.

Parameters:
  • ip_version – IP version

  • num_eus_per_subslice – Number of EUs (XVEs) per subslice (XeCore)

  • num_threads_per_eu – Number of hardware threads per EU (XVE)

  • sgs – Subgrouip sizes

Returns:

Core info

make_core_info_intel_from_arch#

inline auto tinytc::make_core_info_intel_from_arch(intel_gpu_architecture arch) -> core_info#

Get core info for Intel GPUs from lookup table.

Parameters:

arch – IP version

Returns:

Core info

Device Info Classes#

core_info#

class core_info : public tinytc::shared_handle<tinytc_core_info_t>#

Reference-counting wrapper for tinytc_core_info_t.

Public Functions

inline void get_subgroup_sizes(std::uint32_t *sgs_size, std::int32_t const **sgs)#

Get subgroup sizes.

Cf. tinytc_core_info_get_subgroup_sizes

Parameters:
  • sgs_size – Pointer to size of subgroup size array

  • sgs – Pointer ot subgroup size array

inline auto get_register_space() -> std::int32_t#

Get register space per subgroup in bytes.

Returns:

Register space

inline void set_core_features(tinytc_core_feature_flags_t flags)#

Set core features.

Parameters:

flags – set core features; must be 0 or a combination of tinytc_core_feature_flag_t

inline auto get_core_features() const -> tinytc_core_feature_flags_t#

Get core features.

Returns:

Core features

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

Parser#

Parser Functions#

parse_file#

inline auto tinytc::parse_file(char const *filename, source_context source_ctx = {}) -> prog#

Parse source text from file.

Parameters:
  • filename – Filename

  • source_ctx – Source context for improved error reporting

Returns:

Program

parse_stdin#

inline auto tinytc::parse_stdin(source_context source_ctx = {}) -> prog#

Parse source text from stdin.

Parameters:

source_ctx – Source context for improved error reporting

Returns:

Program

parse_string#

inline auto tinytc::parse_string(std::string const &src, source_context source_ctx = {}) -> prog#

Parse source text from string.

Parameters:
  • src – Source text

  • source_ctx – Source context for improved error reporting

Returns:

Porgram

Recipe#

Recipe Enumerations#

mem_type#

enum class tinytc::mem_type#

Memory object type.

Values:

enumerator buffer = tinytc_mem_type_buffer#

Buffer object (e.g. cl_mem)

enumerator usm_pointer = tinytc_mem_type_usm_pointer#

Unified shared memory pointer.

enumerator svm_pointer = tinytc_mem_type_svm_pointer#

Shared virtual memory pointer.

Recipe Functions#

make_small_gemm_batched#

inline auto tinytc::make_small_gemm_batched(core_info const &info, scalar_type ty, transpose tA, transpose tB, std::int64_t M, std::int64_t N, std::int64_t K, std::int64_t ldA, std::int64_t strideA, std::int64_t ldB, std::int64_t strideB, std::int64_t ldC, std::int64_t strideC, source_context ctx = {}) -> small_gemm_batched#

Make small GEMM batched recipe.

Cf. tinytc_recipe_small_gemm_batched_create

Parameters:
  • info – Core info

  • ty – Scalar type of \(\alpha\), A, B, \(\beta\), C

  • tA – Operation applied on A

  • tB – Operation applied on B

  • M – Number of rows of A and C

  • N – Number of columns of B and C

  • K – Number of columns of A, number of rows of B

  • ldA – Leading dimension of an A matrix

  • strideA – Stride of A-matrices

  • ldB – Leading dimension of an B matrix

  • strideB – Stride of B-matrices

  • ldC – Leading dimension of an C matrix

  • strideC – Stride of C-matrices

  • ctx – Source context for improved error reporting

Returns:

Small GEMM batched recipe

make_tall_and_skinny#

inline auto tinytc::make_tall_and_skinny(core_info const &info, scalar_type ty, std::int64_t N, std::int64_t K, std::int32_t M_block_size = 0, source_context ctx = {}) -> tall_and_skinny#

Make tall and skinny recipe.

Cf. tinytc_recipe_tall_and_skinny_create

Parameters:
  • info – Core info

  • ty – Scalar type of \(\alpha\), A, B, \(\beta\), C

  • N – Number of columns of B and C

  • K – Number of columns of A, number of rows of B

  • M_block_size – Chunk size for M-mode

  • ctx – Source context for improved error reporting

Returns:

Tall and skinny recipe

make_tall_and_skinny_specialized#

inline auto tinytc::make_tall_and_skinny_specialized(core_info const &info, scalar_type ty, std::int64_t M, std::int64_t N, std::int64_t K, std::int64_t ldA, std::int64_t ldB, std::int64_t ldC, std::int32_t M_block_size = 0, source_context ctx = {}) -> tall_and_skinny#

Make tall and skinny recipe with additional specialization constants.

Cf. tinytc_recipe_tall_and_skinny_create_specialized

Parameters:
  • info – Core info

  • ty – Scalar type of \(\alpha\), A, B, \(\beta\), C

  • M – Number of rows of A and C; can be dynamic

  • N – Number of columns of B and C

  • K – Number of columns of A, number of rows of B

  • ldA – Leading dimension of A; can be dynamic

  • ldB – Leading dimension of B; can be dynamic

  • ldC – Leading dimension of C; can be dynamic

  • M_block_size – Chunk size for M-mode

  • ctx – Source context for improved error reporting

Returns:

Tall and skinny recipe

Recipe Classes#

recipe#

class recipe : public tinytc::shared_handle<tinytc_recipe_t>#

Reference-counting wrapper for tinytc_recipe_t.

Subclassed by tinytc::small_gemm_batched, tinytc::tall_and_skinny

Public Functions

inline auto get_prog() const -> prog#

Get program.

Returns:

Program

inline auto get_source() const -> source#

Get source.

Returns:

Source

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

recipe_handler#

class recipe_handler : public tinytc::shared_handle<tinytc_recipe_handler_t>#

Reference-counting wrapper for tinytc_recipe_handler_t.

Subclassed by tinytc::level_zero_recipe_handler, tinytc::opencl_recipe_handler, tinytc::sycl_recipe_handler

Public Functions

inline auto get_recipe() const -> recipe#

Get recipe.

Returns:

Recipe

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

small_gemm_batched#

class small_gemm_batched : public tinytc::recipe#

Reference-counting wrapper for tinytc_recipe_t.

Public Static Functions

template<typename T>
static inline void set_args(recipe_handler &handler, std::int64_t howmany, T alpha, mem A, mem B, T beta, mem C)#

Set kernel arguments.

Template Parameters:

T – Scalar type; must match scalar_type passed to constructor

Parameters:
  • handler – Recipe handler

  • howmany – Batch size

  • alpha\(\alpha\)

  • A – Memory object used for A-matrix

  • B – Memory object used for B-matrix

  • beta\(\beta\)

  • C – Memory object used for C-matrix

tall_and_skinny#

class tall_and_skinny : public tinytc::recipe#

Reference-counting wrapper for tinytc_recipe_t.

Public Static Functions

template<typename T>
static inline void set_args(recipe_handler &handler, std::int64_t M, T alpha, mem A, std::int64_t ldA, mem B, std::int64_t ldB, T beta, mem C, std::int64_t ldC)#

Set kernel arguments.

Template Parameters:

T – Scalar type; must match scalar_type passed to constructor

Parameters:
  • handler – Recipe handler

  • M – Number of rows of A and C

  • alpha\(\alpha\)

  • A – Memory object used for A-matrix

  • ldA – Leading dimension of A

  • B – Memory object used for B-matrix

  • ldB – Leading dimension of B

  • beta\(\beta\)

  • C – Memory object used for C-matrix

  • ldC – Leading dimension of C

Recipe Structures#

auto_mem_type#

template<typename T, typename Enable = void>
struct auto_mem_type#

Guess memory type of memory object.

Template Parameters:

T – memory object type

auto_mem_type<T, std::enable_if_t<usm_pointer_type<T>>>#

template<typename T>
struct auto_mem_type<T, std::enable_if_t<usm_pointer_type<T>>>#

Specialize auto_mem_type for pointer to non-class types.

All pointers to scalars are assumed to be Unified Shared Memory pointers. (Automatic guessing for Shared Virtual Memory pointers not implemented.)

Template Parameters:

T – memory object type

Public Static Attributes

static constexpr mem_type value = mem_type::usm_pointer#

Pointer maps to USM pointer type.

mem#

struct mem#

Type-safe wrapper for memory objects.

Public Functions

template<typename T>
inline mem(T const value, mem_type type = auto_mem_type_v<T>)#

ctor

Template Parameters:

T – pointer type or buffer type

Parameters:
  • value – USM / SVM pointer or cl_mem (cl_mem implicitly converts to void*)

  • type – memory object type

Public Members

const void *value#

USM / SVM pointer or cl_mem (passed by value)

mem_type type#

Memory object type.

Recipe Variables#

auto_mem_type_v#

template<typename T>
constexpr auto tinytc::auto_mem_type_v = auto_mem_type<T>::value#

Convenience wrapper for auto_mem_type.

Template Parameters:

T – memory object type

usm_pointer_type#

template<typename T>
constexpr bool tinytc::usm_pointer_type = std::is_pointer_v<T> && (std::is_fundamental_v<std::remove_pointer_t<T>> || std::is_fundamental_v<std::remove_pointer_t<std::remove_pointer_t<T>>>)#

True if T is either pointer to a fundamental type or a pointer to a pointer to a fundamental type.

Template Parameters:

T – type

Source#

Source Classes#

source#

class source : public tinytc::shared_handle<tinytc_source_t>#

Reference-counting wrapper for tinytc_source_t.

Public Functions

inline auto get_code() const -> std::string_view#

Get code.

Returns:

Pointer to C-string that is bound to the lifetime of the source object

inline auto get_location() const -> location#

Get location.

Returns:

Location

inline void get_extensions(std::uint32_t &extensions_size, char const *const *&extensions) const#

Get OpenCL extension.

Parameters:
  • extensions_size – Number of extensions

  • extensions – Array of extensions

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

Source Context#

Source Context Functions#

make_source_context#

inline auto tinytc::make_source_context() -> source_context#

Create source context.

Returns:

Source context

Source Context Classes#

source_context#

class source_context : public tinytc::shared_handle<tinytc_source_context_t>#

Reference-counting wrapper for tinytc_source_context_t.

Public Functions

inline auto add_source(char const *name, char const *text) -> std::int32_t#

Add source to context.

Parameters:
  • name – File name

  • text – Source text

Returns:

Source id (should be set in position.source_id)

inline auto get_error_log() const noexcept -> char const*#

Get error log.

Returns:

C-string that is valid as long as source_context is not modified; empty string if source_context is empty

inline void report_error(location const &loc, char const *what, bool append = true)#

Enhance error message with source context; useful when builder is used.

Parameters:
  • loc – Source location

  • what – Error description

  • append – True: append to error log; false: clear error log

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.