Core C++-API

Contents

Core C++-API#

Common#

Common Classes#

array_view_base#

template<typename T>
class array_view_base#

Base implementation of array view.

Template Parameters:

T – array element type

Subclassed by tinytc::mutable_array_view< T >

Public Functions

array_view_base() = default#

Empty array view.

inline array_view_base(T &single)#

Single element view.

Parameters:

single – the single element

inline array_view_base(T *data, std::size_t size)#

ctor

Parameters:
  • data – base pointer

  • size – array size

inline array_view_base(T *begin, T *end)#

ctor

Parameters:
  • begin – begin pointer

  • end – end pointer (not included)

inline iterator begin() const#

Begin iterator.

inline iterator end() const#

End iterator.

inline bool empty() const#

Returns true if view is empty.

inline std::size_t size() const#

Returns array size.

inline T &front() const#

Access first element; must not call when array size is 0.

inline T &back() const#

Access last element; must not call when array size is 0.

inline T *data() const#

Get data pointer.

inline T &operator[](std::size_t n) const#

Access operator.

inline operator std::vector<std::remove_const_t<T>>() const#

Convert to vector.

inline bool operator==(array_view_base<T> const &other) const#

Equals operator.

array_view#

template<typename T>
class array_view : public tinytc::array_view_base<const T>#

Stores an immutable view on an array (pointer + size)

Template Parameters:

T – array element type

Public Functions

inline array_view(std::vector<T> const &vec)#

Convert vector to array view.

Parameters:

vec – standard vector

template<std::size_t N>
inline array_view(std::array<T, N> const &arr)#

Convert std::array to array view.

Template Parameters:

N – array size

Parameters:

arr – standard array

inline array_view(std::initializer_list<T> const &arr)#

Convert initializer list to array view (array_view must be rvalue)

Parameters:

arr – initializer list

mutable_array_view#

template<typename T>
class mutable_array_view : public tinytc::array_view_base<T>#

Stores a mutable view on an array (pointer + size)

Template Parameters:

T – array element type

Public Functions

inline mutable_array_view(std::vector<T> &vec)#

Convert vector to array view.

Parameters:

vec – standard vector

template<std::size_t N>
inline mutable_array_view(std::array<T, N> &arr)#

Convert std::array to array view.

Template Parameters:

N – array size

Parameters:

arr – standard array

shared_handle#

template<typename T>
class shared_handle#

Wraps a C handle in a reference-counted object.

Template Parameters:

T – C handle type (handle type = pointer to opaque struct)

Public Types

using traits = internal::shared_handle_traits<T>#

Traits shortcut.

using native_type = T#

Typedef for native C handle.

Public Functions

inline shared_handle()#

Create empty (invalid) handle.

inline explicit shared_handle(T obj, bool needs_retain = false)#

Create handle from C handle.

inline ~shared_handle()#

Decrease reference count.

inline shared_handle(shared_handle const &other)#

Copy ctor.

inline shared_handle(shared_handle &&other) noexcept#

Move ctor.

inline shared_handle &operator=(shared_handle const &other)#

Copy operator.

inline shared_handle &operator=(shared_handle &&other)#

Move operator.

inline std::remove_pointer_t<T> &operator*() const#

Dereference C handle and get reference to underlying type.

inline T operator->() const#

Convert handle to C handle.

inline T get() const#

Returns C handle.

inline T release()#

Returns C handle and releases the ownership of the managed object.

inline explicit operator bool() const noexcept#

Check whether handle is non-empty (valid)

inline bool operator==(shared_handle<T> const &other) const#

Check equality.

inline bool operator!=(shared_handle<T> const &other) const#

Check inequality.

unique_handle#

template<typename T>
class unique_handle#

Wraps a C handle in a unique_ptr-alike object.

Template Parameters:

T – C handle type (handle type = pointer to opaque struct)

Public Types

using traits = internal::unique_handle_traits<T>#

Traits shortcut.

using native_type = T#

Typedef for native C handle.

Public Functions

inline unique_handle()#

Create empty (invalid) handle.

inline explicit unique_handle(T obj)#

Create handle from C handle.

inline ~unique_handle()#

Destroy object.

unique_handle(unique_handle const &other) = delete#

Copy ctor.

inline unique_handle(unique_handle &&other) noexcept#

Move ctor.

unique_handle &operator=(unique_handle const &other) = delete#

Copy operator.

inline unique_handle &operator=(unique_handle &&other)#

Move operator.

inline std::remove_pointer_t<T> &operator*() const#

Dereference C handle and get reference to underlying type.

inline T operator->() const#

Convert handle to C handle.

inline T get() const#

Returns C handle.

inline T release()#

Returns C handle and releases the ownership of the managed object.

inline explicit operator bool() const noexcept#

Check whether handle is non-empty (valid)

inline bool operator==(unique_handle<T> const &other) const#

Check equality.

inline bool operator!=(unique_handle<T> const &other) const#

Check inequality.

Common Enumerations#

address_space#

enum class tinytc::address_space#

Address space

Values:

enumerator global = 1#

Global memory.

enumerator local = 2#

Local memory, returned by alloca.

bundle_format#

enum class tinytc::bundle_format#

Target binary format

Values:

enumerator spirv = 0#

SPIR-V.

enumerator native = 1#

Native device binary.

checked_flag#

enum class tinytc::checked_flag#

Checked flag.

Checks can be combined by bitwise or, that is,

tinytc_checked_flag_both = tinytc_checked_flag_rows | tinytc_checked_flag_cols tinytc_checked_flag_rows = tinytc_checked_flag_rows | tinytc_checked_flag_none

Values:

enumerator none = 0#

Perform no checks.

enumerator rows = 1#

Check for out-of-bound rows.

enumerator cols = 2#

Check for out-of-bound cols.

enumerator both = 3#

Check for out-of-bound rows and cols.

comp3#

enum class tinytc::comp3#

Named components of 3d vector

Values:

enumerator x = 0#

.x

enumerator y = 1#

.y

enumerator z = 2#

.z

core_feature_flag#

enum class tinytc::core_feature_flag#

Core features that may be optionally enabled

Values:

enumerator large_register_file = 1#

Request a large register file. On PVC this doubles the number of registers per vector engine but halves the number of available hardware threads. When this feature is activated, the kernel is compiled with the -ze-opt-large-register-file option.

intel_gpu_architecture#

enum class tinytc::intel_gpu_architecture#

IP versions for Intel GPUs.

Note: IP versions are extracted from

Values:

enumerator tgl = 50331648#

Tiger Lake.

enumerator pvc = 51314688#

Ponte Vecchio.

enumerator bmg = 83902464#

Battlemage.

matrix_use#

enum class tinytc::matrix_use#

Matrix use

Values:

enumerator a = 0#

matrix_a

enumerator b = 1#

matrix_b

enumerator acc = 2#

matrix_acc

mem_type#

enum class tinytc::mem_type#

Memory object type

Values:

enumerator buffer = 0#

Buffer object (e.g. cl_mem)

enumerator usm_pointer = 1#

Unified shared memory pointer.

enumerator svm_pointer = 2#

Shared virtual memory pointer.

memory_scope#

enum class tinytc::memory_scope#

Memory scope flag

Values:

enumerator cross_device = 0#
enumerator device = 1#
enumerator work_group = 2#
enumerator subgroup = 3#

memory_semantics#

enum class tinytc::memory_semantics#

Memory semantics

Values:

enumerator relaxed = 0#

Relaxed.

enumerator acquire = 2#

Acquire.

enumerator release = 4#

Release.

enumerator acquire_release = 8#

Acquire and release.

enumerator sequentially_consistent = 16#

Sequentially consistent.

optflag#

enum class tinytc::optflag#

Flags for optimizer

Values:

enumerator unsafe_fp_math = 0#

Unsafe floating point math (e.g. 0.0 * x => 0.0)

reduce_mode#

enum class tinytc::reduce_mode#

Reduce mode

Values:

enumerator row = 0#

Reduction over rows.

enumerator column = 1#

Reduction over columns.

spirv_feature#

enum class tinytc::spirv_feature#

SPIR-V features

Values:

enumerator float16 = 0#

f16 support

enumerator float64 = 1#

f64 support

enumerator int64_atomics = 2#

i64 atomics support

enumerator groups = 3#

work group collectives

enumerator subgroup_dispatch = 4#

subgroup support

enumerator atomic_float16_add_local = 5#

f16 atomic add on local pointer

enumerator atomic_float16_add_global = 6#

f16 atomic add on global pointer

enumerator atomic_float32_add_local = 7#

f32 atomic add on local pointer

enumerator atomic_float32_add_global = 8#

f32 atomic add on global pointer

enumerator atomic_float64_add_local = 9#

f64 atomic add on local pointer

enumerator atomic_float64_add_global = 10#

f64 atomic add on global pointer

enumerator atomic_float16_min_max_local = 11#

f16 atomic min/max on local pointer

enumerator atomic_float16_min_max_global = 12#

f16 atomic min/max on global pointer

enumerator atomic_float32_min_max_local = 13#

f32 atomic min/max on local pointer

enumerator atomic_float32_min_max_global = 14#

f32 atomic min/max on global pointer

enumerator atomic_float64_min_max_local = 15#

f64 atomic min/max on local pointer

enumerator atomic_float64_min_max_global = 16#

f64 atomic minmax on global pointer

enumerator bfloat16_conversion = 17#

bf16 -> f32 and f32 -> bf16 conversion

enumerator subgroup_buffer_block_io = 18#

subgroup block read/write support

status#

enum class tinytc::status#

Status codes

Values:

enumerator success = 0#

Success.

enumerator bad_alloc = 1#

Bad allocation.

enumerator invalid_arguments = 2#

Invalid arguments passed to function.

enumerator out_of_range = 3#

Out of range.

enumerator runtime_error = 4#

General runtime error.

enumerator internal_compiler_error = 5#

Internal compiler error.

enumerator unsupported_subgroup_size = 6#

Device does not support subgroup size.

enumerator unsupported_work_group_size = 7#

Device does not support work-group size.

enumerator compilation_error = 8#

Compilation error.

enumerator file_io_error = 9#

I/O error occured in file operation.

enumerator parse_error = 10#

Parse error.

enumerator unavailable_extension = 11#

Required vendor extension is unavailable.

enumerator unsupported_backend = 12#

Unsupported backend.

enumerator invalid_kernel_arguments = 13#

Invalid arguments passed to kernel.

enumerator unsupported_device = 14#

Unsupported device.

enumerator invalid_core_info = 15#

Invalid core info object (e.g. max work group size is 0 or subgroup sizes vector is empty)

enumerator unknown_pass_name = 16#

Unknown compiler pass name.

enumerator not_implemented = 17#

Not implemented.

enumerator compute_runtime_error = 18#

Error occured in compute runtime.

enumerator ir_out_of_bounds = 256#

Argument is out of bounds.

enumerator ir_invalid_shape = 257#

Invalid shape.

enumerator ir_incompatible_shapes = 258#

Incompatible tensor shapes.

enumerator ir_shape_stride_mismatch = 259#

Dimension of shape and stride must match.

enumerator ir_number_mismatch = 260#

Number type mismatch.

enumerator ir_invalid_number_of_indices = 261#

Number of indices must match memref order or must be 1 for group types.

enumerator ir_expected_boolean = 262#

Expected boolean type.

enumerator ir_expected_number = 263#

Expected number type.

enumerator ir_expected_int = 264#

Expected integer type.

enumerator ir_expected_float = 265#

Expected floating point type.

enumerator ir_expected_complex = 266#

Expected complex type.

enumerator ir_expected_i32 = 267#

Expected i32 type.

enumerator ir_expected_index = 268#

Expected index type.

enumerator ir_expected_coopmatrix = 269#

Expected coopmatrix type.

enumerator ir_expected_coopmatrix_or_number = 270#

Expected coopmatrix or number type.

enumerator ir_expected_coopmatrix_number_or_boolean = 271#

Expected coopmatrix type, number type, or boolean type.

enumerator ir_expected_memref = 272#

Expected memref type.

enumerator ir_expected_memref_or_number = 273#

Expected memref or number type.

enumerator ir_expected_memref_or_group = 274#

Expected memref or group type.

enumerator ir_expected_memref_order_0 = 275#

Expected memref of order 0 (scalar)

enumerator ir_expected_memref_order_1 = 276#

Expected memref of order 1 (vector)

enumerator ir_expected_memref_order_2 = 277#

Expected memref of order 2 (matrix)

enumerator ir_expected_memref_order_0_or_1 = 278#

Expected memref of order 0 or 1 (scalar or vector)

enumerator ir_expected_memref_order_1_or_2 = 279#

Expected memref of order 1 or 2 (vector or matrix)

enumerator ir_expected_memref_order_0_1_or_2 = 280#

Expected memref of order 0, 1, or 2 (scalar, vector, or matrix)

enumerator ir_unexpected_yield = 281#

Yield encountered in non-yielding region.

enumerator ir_yield_mismatch = 282#

Number of yielded values does not match number of values yielded by region or the types are different

enumerator ir_subview_mismatch = 283#

Number of dynamic offsets and sizes must match number of dynamic operands.

enumerator ir_invalid_slice = 284#

Static offset and size must be non-negative or dynamic (?)

enumerator ir_expand_shape_order_too_small = 285#

Expand shape must have at least 2 entries.

enumerator ir_expand_shape_mismatch = 286#

Number of dynamic expand shape operands must equal number of dynamic modes in static expand shape

enumerator ir_collective_called_from_spmd = 287#

Collective instruction must not be called from SPMD region.

enumerator ir_fp_unsupported = 288#

Floating point type unsupported by instruction.

enumerator ir_spmd_called_from_collective = 289#

SPMD instruction must not be called from collective region.

enumerator ir_expected_local_address_space = 290#

Expected memref with local address space.

enumerator ir_expected_global_address_space = 291#

Expected memref with global address space.

enumerator ir_address_space_mismatch = 292#

Address space must match.

enumerator ir_invalid_offset = 293#

Offset must be non-negative or dynamic.

enumerator ir_int_unsupported = 294#

Instruction does not support int type.

enumerator ir_boolean_unsupported = 295#

Instruction does not support boolean type.

enumerator ir_complex_unsupported = 296#

Instruction does not support complex type.

enumerator ir_coopmatrix_unsupported = 297#

Instruction does not support coopmatrix type.

enumerator ir_forbidden_cast = 298#

Forbidden cast.

enumerator ir_invalid_beta = 299#

beta must be constant and 0 or 1 for atomic linear algebra operations

enumerator ir_init_return_mismatch = 300#

The number or types of the initial values does not match the return type list.

enumerator ir_invalid_matrix_use = 301#

Operands have invalid matrix use.

enumerator ir_unsupported_coopmatrix_shape = 302#

Unsupported coopmatrix shape for the combination of Number type, matrix use, and target architecture

enumerator ir_forbidden_promotion = 303#

Number type promotion is forbidden.

enumerator ir_constant_mismatch = 304#

Type of constant does not match type or returned value.

enumerator ir_insufficient_alignment = 305#

Pointer does not satisfy minimum alignment requirements.

enumerator ir_must_have_yield = 306#

Last instruction of region that returns values must be yield.

enumerator ir_yield_in_else_branch_missing = 307#

Else-branch must have yield instruction if then-branch has yield instruction.

enumerator ir_from_to_mismatch = 308#

length(from) != length(to)

enumerator ir_from_tile_shape_mismatch = 309#

length(from) != length(tile_shape)

enumerator ir_operand_type_must_match_return_type = 310#

Operand type must match return type.

enumerator ir_invalid_stride = 311#

Invalid stride.

enumerator ir_init_return_type_mismatch = 312#

Type of initializer does not match return type or the number of return types is not equal the number of initializers

enumerator ir_value_still_has_uses = 313#

A value shall be erased that still has uses.

enumerator ir_expected_array_attribute = 320#

Expected array attribute.

enumerator ir_expected_boolean_attribute = 321#

Expected boolean attribute.

enumerator ir_expected_dictionary_attribute = 322#

Expected dictionary attribute.

enumerator ir_expected_integer_attribute = 323#

Expected integer attribute.

enumerator ir_expected_string_attribute = 324#

Expected string attribute.

enumerator ir_duplicate_key_in_dictionary = 325#

Duplicate key detected in list of named attributes.

enumerator ir_unexpected_array_attribute_size = 326#

Unexpected array size.

enumerator ir_expected_non_scalar_memref = 336#

Expected memref of dimension greater or equal than 1.

enumerator ir_complex_number_type_unsupported = 337#

Complex number type not supported.

enumerator ir_tile_shape0_not_multiple_of_sgs = 338#

Tile shape 0 is not a multiple of the subgroup size.

enumerator ir_division_by_zero = 339#

Division by zero detected.

enumerator spirv_forbidden_forward_declaration = 4096#

Forward declaration of id is forbidden.

enumerator spirv_undefined_value = 4097#

Undefined SPIR-V value.

enumerator spirv_missing_dope_vector = 4098#

Dope vector missing.

enumerator spirv_unsupported_atomic_data_type = 4099#

Atomic data type unsupported by SPIR-V.

enumerator spirv_required_feature_unavailable = 4100#

A required SPIR-V feature is unavailable.

enumerator unknown = 2147483647#

Unknown status code.

support_level#

enum class tinytc::support_level#

Support level of a device

Values:

enumerator none = 0#

Device is unsupported (e.g. subgroups feature missing in OpenCL-C)

enumerator basic = 1#

Device provides necessary features but is not well tested.

enumerator tuned = 2#

Device provides necessary features and is well tested.

transpose#

enum class tinytc::transpose#

Transpose

Values:

enumerator N = 0#

n

enumerator T = 1#

t

Common Functions#

CHECK_STATUS#

inline void tinytc::CHECK_STATUS(tinytc_status_t code)#

Throw exception for unsuccessful call to C-API.

CHECK_STATUS_LOC#

inline void tinytc::CHECK_STATUS_LOC(tinytc_status_t code, location const &loc)#

Throw exception for unsuccessful call to C-API.

to_string(address_space)#

inline char const *tinytc::to_string(address_space val)#

Convert address_space to string.

to_string(bundle_format)#

inline char const *tinytc::to_string(bundle_format val)#

Convert bundle_format to string.

to_string(checked_flag)#

inline char const *tinytc::to_string(checked_flag val)#

Convert checked_flag to string.

to_string(comp3)#

inline char const *tinytc::to_string(comp3 val)#

Convert comp3 to string.

to_string(core_feature_flag)#

inline char const *tinytc::to_string(core_feature_flag val)#

Convert core_feature_flag to string.

to_string(intel_gpu_architecture)#

inline char const *tinytc::to_string(intel_gpu_architecture val)#

Convert intel_gpu_architecture to string.

to_string(matrix_use)#

inline char const *tinytc::to_string(matrix_use val)#

Convert matrix_use to string.

to_string(mem_type)#

inline char const *tinytc::to_string(mem_type val)#

Convert mem_type to string.

to_string(memory_scope)#

inline char const *tinytc::to_string(memory_scope val)#

Convert memory_scope to string.

to_string(memory_semantics)#

inline char const *tinytc::to_string(memory_semantics val)#

Convert memory_semantics to string.

to_string(optflag)#

inline char const *tinytc::to_string(optflag val)#

Convert optflag to string.

to_string(reduce_mode)#

inline char const *tinytc::to_string(reduce_mode val)#

Convert reduce_mode to string.

to_string(spirv_feature)#

inline char const *tinytc::to_string(spirv_feature val)#

Convert spirv_feature to string.

to_string(status)#

inline char const *tinytc::to_string(status val)#

Convert status to string.

to_string(support_level)#

inline char const *tinytc::to_string(support_level val)#

Convert support_level to string.

to_string(transpose)#

inline char const *tinytc::to_string(transpose val)#

Convert transpose to string.

Common Structures#

auto_mem_type#

template<typename T, typename Enable = void>
struct auto_mem_type#

Guess memory type of memory object.

Template Parameters:

T – memory object type

auto_mem_type<T, std::enable_if_t<is_usm_pointer_type<T>>>#

template<typename T>
struct auto_mem_type<T, std::enable_if_t<is_usm_pointer_type<T>>>#

Specialize auto_mem_type for pointer to non-class types.

All pointers to scalars are assumed to be Unified Shared Memory pointers. (Automatic guessing for Shared Virtual Memory pointers not implemented.)

Template Parameters:

T – memory object type

Public Static Attributes

static mem_type value = mem_type::usm_pointer#

Pointer maps to USM pointer type.

mem#

struct mem#

Type-safe wrapper for memory objects.

Public Functions

template<typename T>
inline mem(T const value, mem_type type = auto_mem_type_v<T>)#

ctor

Template Parameters:

T – pointer type or buffer type

Parameters:
  • value – USM / SVM pointer or cl_mem (cl_mem implicitly converts to void*)

  • type – memory object type

Public Members

const void *value#

USM / SVM pointer or cl_mem (passed by value)

mem_type type#

Memory object type.

Common Variables#

auto_mem_type_v#

template<typename T>
auto tinytc::auto_mem_type_v = auto_mem_type<T>::value#

Convenience wrapper for auto_mem_type.

Template Parameters:

T – memory object type

is_supported_scalar_type#

template<typename T>
bool tinytc::is_supported_scalar_type = std::is_same_v<T, std::int8_t> || std::is_same_v<T, std::int16_t> || std::is_same_v<T, std::int32_t> || std::is_same_v<T, std::int64_t> || std::is_same_v<T, float> || std::is_same_v<T, double> || std::is_same_v<T, std::complex<float>> || std::is_same_v<T, std::complex<double>>#

Check whether T maps to a scalar data type.

Template Parameters:

T – type

is_usm_pointer_type#

template<typename T>
bool tinytc::is_usm_pointer_type = std::is_same_v<T, void*> || (std::is_pointer_v<T> && (is_supported_scalar_type<std::remove_pointer_t<T>> || is_supported_scalar_type<std::remove_pointer_t<std::remove_pointer_t<T>>>))#

True if T is either pointer to a support scalar type or a pointer to a pointer to a supported scalar type; void* is fine, too.

Template Parameters:

T – type

Binary#

Binary Functions#

create_binary#

inline shared_handle<tinytc_binary_t> tinytc::create_binary(tinytc_compiler_context_t ctx, bundle_format format, std::size_t data_size, std::uint8_t const *data, tinytc_core_feature_flags_t core_features)#

Create binary.

Parameters:
  • ctx – Compiler context

  • format – Bundle format (SPIR-V or Native)

  • data_size – Size of data in bytes

  • data – Binary data; data is copied

  • core_features – requested core features; must be 0 (default) or a combination of tinytc_core_feature_flag_t

Returns:

Binary

get_compiler_context(const_tinytc_binary_t)#

inline shared_handle<tinytc_compiler_context_t> tinytc::get_compiler_context(const_tinytc_binary_t bin)#

Get compiler context.

Parameters:

bin – Binary

Returns:

Compiler context

get_core_features(const_tinytc_binary_t)#

inline tinytc_core_feature_flags_t tinytc::get_core_features(const_tinytc_binary_t bin)#

Get core features.

Parameters:

bin – Binary

Returns:

Core features

get_raw#

inline raw_binary tinytc::get_raw(tinytc_binary_t bin)#

Get raw data.

Parameters:

bin – Binary

Returns:

Raw data

Binary Structures#

raw_binary#

struct raw_binary#

Container for raw data.

Public Members

bundle_format format#

Bundle format.

std::size_t data_size#

Size of binary data in bytes.

std::uint8_t const *data#

Pointer to binary data.

Compiler#

Compiler Functions#

run_function_pass#

inline void tinytc::run_function_pass(char const *pass_name, tinytc_prog_t prg, const_tinytc_core_info_t info = {})#

Run a function pass on every function of a program.

Parameters:
  • pass_name – name of function pass; cf. list_function_passes

  • prg – tensor program; modified as compiler pass is run

  • info – core info object; might be nullptr if core info is not required for pass

list_function_passes#

inline void tinytc::list_function_passes(std::size_t &names_size, char const *const *&names)#

Get function pass names.

Parameters:
  • names_size – Number of function pass names

  • names – Array of function pass names

compile_to_spirv#

inline shared_handle<tinytc_spv_mod_t> tinytc::compile_to_spirv(tinytc_prog_t prg, const_tinytc_core_info_t info)#

Convert tensor language to SPIR-V.

Parameters:
  • prg – Program

  • info – Core info

Returns:

SPIR-V module

compile_to_spirv_and_assemble#

inline shared_handle<tinytc_binary_t> tinytc::compile_to_spirv_and_assemble(tinytc_prog_t prg, const_tinytc_core_info_t info)#

Compile program to SPIR-V and assemble.

Parameters:
  • prg – Program

  • info – Core info

Returns:

Binary

spirv_assemble#

inline shared_handle<tinytc_binary_t> tinytc::spirv_assemble(tinytc_spv_mod_t mod)#

Assemble SPIR-V module.

Parameters:

mod – [in] SPIR-V module

Returns:

Binary

Compiler Context#

Compiler Context Functions#

add_source#

inline std::int32_t tinytc::add_source(tinytc_compiler_context_t ctx, char const *name, char const *text)#

Add compiler to context.

Parameters:
  • ctx – compiler context

  • name – File name

  • text – Source text

Returns:

Source id (should be set in position.source_id)

create_compiler_context#

inline shared_handle<tinytc_compiler_context_t> tinytc::create_compiler_context()#

Create compiler context.

Returns:

Compiler context

set_error_reporter#

inline void tinytc::set_error_reporter(tinytc_compiler_context_t ctx, tinytc_error_reporter_t reporter, void *user_data = nullptr)#

Set error reporter.

Error reporting function that is called whenever an error occurs in the parser or the builder.

Parameters:
  • ctx – compiler context

  • reporter – error reporting callback

  • user_data – pointer to user data that is passed to the callback

set_optimization_flag#

inline void tinytc::set_optimization_flag(tinytc_compiler_context_t ctx, optflag flag, std::int32_t state)#

Sets an optimization flag.

The state can be 0 (disabled), 1 (enabled), or -1 (use default according to optimization level).

Parameters:
  • ctx – compiler context

  • flag – optimization flag

  • state – flag state

set_optimization_level#

inline void tinytc::set_optimization_level(tinytc_compiler_context_t ctx, std::int32_t level)#

Set optimization level.

Parameters:
  • ctx – compiler context

  • level – optimization level

report_error#

inline void tinytc::report_error(tinytc_compiler_context_t ctx, location const &loc, char const *what)#

Enhance error message with compiler context; useful when builder is used.

Parameters:
  • ctx – compiler context

  • loc – Source location

  • what – Error description

Device Info#

Device Info Functions#

create_core_info_generic#

inline shared_handle<tinytc_core_info_t> tinytc::create_core_info_generic(std::int32_t register_space, std::int32_t max_work_group_size, array_view<std::int32_t> sgs)#

Create core info for generic GPUs manually.

Parameters:
  • register_space – Size of register file per subgroup in bytes

  • max_work_group_size – Maximum size of local work group

  • sgs – Subgrouip sizes

Returns:

Core info

create_core_info_intel#

inline shared_handle<tinytc_core_info_t> tinytc::create_core_info_intel(std::uint32_t ip_version, std::int32_t num_eus_per_subslice, std::int32_t num_threads_per_eu, array_view<std::int32_t> sgs)#

Create core info for Intel GPUs manually.

Parameters:
  • ip_version – IP version

  • num_eus_per_subslice – Number of EUs (XVEs) per subslice (XeCore)

  • num_threads_per_eu – Number of hardware threads per EU (XVE)

  • sgs – Subgrouip sizes

Returns:

Core info

create_core_info_intel_from_arch#

inline shared_handle<tinytc_core_info_t> tinytc::create_core_info_intel_from_arch(intel_gpu_architecture arch)#

Get core info for Intel GPUs from lookup table.

Parameters:

arch – IP version

Returns:

Core info

create_core_info_intel_from_name#

inline shared_handle<tinytc_core_info_t> tinytc::create_core_info_intel_from_name(char const *name)#

Get core info for Intel GPUs from lookup table.

Parameters:

name – architecture name

Returns:

Core info

get_core_features(const_tinytc_core_info_t)#

inline tinytc_core_feature_flags_t tinytc::get_core_features(const_tinytc_core_info_t info)#

Get core features.

Parameters:

info – Core info

Returns:

Core features

get_subgroup_sizes#

inline array_view<std::int32_t> tinytc::get_subgroup_sizes(const_tinytc_core_info_t info)#

Get subgroup sizes.

Parameters:

info – Core info

Returns:

Subgroup sizes

get_register_space#

inline std::int32_t tinytc::get_register_space(const_tinytc_core_info_t info)#

Get register space per subgroup in bytes.

Parameters:

info – Core info

Returns:

Register space

have_spirv_feature#

inline bool tinytc::have_spirv_feature(const_tinytc_core_info_t info, spirv_feature feature)#

Get SPIR-V feature.

Parameters:
  • info – Core info

  • feature – SPIR-V feature

Returns:

true if feature is available and false otherwise

set_core_features#

inline void tinytc::set_core_features(tinytc_core_info_t info, tinytc_core_feature_flags_t flags)#

Set core features.

Parameters:

set_default_alignment#

inline void tinytc::set_default_alignment(tinytc_core_info_t info, std::int32_t alignment)#

Set default alignment.

Parameters:
  • info – Core info

  • alignment – alignment in bytes

set_spirv_feature#

inline void tinytc::set_spirv_feature(tinytc_core_info_t info, spirv_feature feature, bool available)#

Set SPIR-V feature.

Parameters:
  • info – Core info

  • feature – SPIR-V feature

  • available – true if feature is available and false otherwise

FP math#

FP math Classes#

lp_float#

template<typename T, typename F16f>
class lp_float#

Low precision float type.

For all operations, low precision floats are converted single precision, the operation is done in single precision, and then the result is stored in the low precision type

Template Parameters:
  • T – storage type

  • F16f – low precision floating point format

Public Functions

inline lp_float(float const &val)#

construct from float

inline lp_float &operator=(float const &rhs)#

assign float

inline operator float() const#

implicit conversion to float

inline T bits() const#

Get bit representation.

inline lp_float operator+(lp_float const &rhs) const#

add

inline lp_float &operator+=(lp_float const &rhs)#

add to

inline lp_float operator-(lp_float const &rhs) const#

subtract

inline lp_float &operator-=(lp_float const &rhs)#

subtract from

inline lp_float operator*(lp_float const &rhs) const#

multiply

inline lp_float &operator*=(lp_float const &rhs)#

multiply with

inline lp_float operator/(lp_float const &rhs) const#

divide

inline lp_float &operator/=(lp_float const &rhs)#

divide with

inline lp_float operator-()#

unary minus

inline lp_float &operator++()#

pre-increase by 1

inline lp_float operator++(int)#

post-increase by 1

inline lp_float &operator--()#

pre-decrease by 1

inline lp_float operator--(int)#

post-decrease by 1

inline bool operator==(lp_float const &rhs) const#

equal

inline bool operator!=(lp_float const &rhs) const#

not equal

inline bool operator>(lp_float const &rhs) const#

greater than

inline bool operator>=(lp_float const &rhs) const#

greater than or equal

inline bool operator<(lp_float const &rhs) const#

less than

inline bool operator<=(lp_float const &rhs) const#

less than or equal

Public Static Functions

static inline lp_float from_bits(T const &val)#

Construct lp_float from bit representation.

FP math Functions#

ieee754_extend#

template<typename F32f, typename F16f>
typename F32f::bits_type tinytc::ieee754_extend(typename F16f::bits_type x)#

Extend low precision floating point number and return high precision floating point number.

Template Parameters:
  • F32f – high precision floating point format

  • F16f – low precision floating point format

Parameters:

x – bit pattern of low precision number

Returns:

bit pattern of high precision number

ieee754_truncate#

template<typename F16f, typename F32f>
typename F16f::bits_type tinytc::ieee754_truncate(typename F32f::bits_type x)#

Truncate high precision floating point number and return low precision floating point number.

Template Parameters:
  • F16f – low precision floating point format

  • F32f – high precision floating point format

Parameters:

x – bit pattern of high precision number

Returns:

bit pattern of low precision number

FP math Structures#

ieee754_format#

template<uint32_t ExponentBits, uint32_t MantissaBits>
struct ieee754_format#

IEEE754 floating point format parameters.

Template Parameters:
  • ExponentBits – Number of exponent bits

  • MantissaBits – Number of mantissa bits

Public Types

using bits_type = std::conditional_t<num_bytes == 1, std::uint8_t, std::conditional_t<num_bytes == 2, std::uint16_t, std::conditional_t<num_bytes == 4, std::uint32_t, std::conditional_t<num_bytes == 8, std::uint64_t, void>>>>#

Unsigned integer type large enough to store bit pattern.

Public Static Attributes

static uint32_t exponent_bits = ExponentBits#

Number of exponent bits.

static uint32_t mantissa_bits = MantissaBits#

Number of mantissa bits

static uint32_t num_bits = 1 + exponent_bits + mantissa_bits#

Total number of bits.

static uint32_t bias = (1 << (exponent_bits - 1)) - 1#

Bias.

static uint32_t max_biased_exponent = (1 << exponent_bits) - 1#

Max exponent when encoded with bias added.

static uint32_t sign_mask = 1 << (num_bits - 1)#

Bit mask for sign bit.

static uint32_t exponent_mask = max_biased_exponent << mantissa_bits#

Bit mask for exponent bits.

static uint32_t mantissa_mask = (1 << mantissa_bits) - 1#

Bit mask for exponent mantissa bits.

static uint32_t num_bytes = 1 + (num_bits - 1) / 8#

Number of bytes.

FP math Typedefs#

bf16_format#

using tinytc::bf16_format = ieee754_format<8, 7>#

Floating point format for bf16 (bfloat16)

bfloat16#

using tinytc::bfloat16 = lp_float<std::uint16_t, bf16_format>#

bf16 host emulation type

f16_format#

using tinytc::f16_format = ieee754_format<5, 10>#

Floating point format for f16 (half)

f32_format#

using tinytc::f32_format = ieee754_format<8, 23>#

Floating point format for f32 (float)

half#

using tinytc::half = lp_float<std::uint16_t, f16_format>#

fp16 host emulation type

Parser#

Parser Functions#

parse_file#

inline shared_handle<tinytc_prog_t> tinytc::parse_file(char const *filename, tinytc_compiler_context_t ctx = {})#

Parse source text from file.

Parameters:
  • filename – Filename

  • ctx – Compiler context

Returns:

Program

parse_stdin#

inline shared_handle<tinytc_prog_t> tinytc::parse_stdin(tinytc_compiler_context_t ctx = {})#

Parse source text from stdin.

Parameters:

ctx – Compiler context

Returns:

Program

parse_string#

inline shared_handle<tinytc_prog_t> tinytc::parse_string(std::string const &src, tinytc_compiler_context_t ctx = {})#

Parse source text from string.

Parameters:
  • src – Source text

  • ctx – Compiler context

Returns:

Program

Program#

Program Functions#

dump(tinytc_prog_t)#

inline void tinytc::dump(tinytc_prog_t p)#

Dump program to stderr.

Parameters:

p – program

get_compiler_context(const_tinytc_prog_t)#

inline shared_handle<tinytc_compiler_context_t> tinytc::get_compiler_context(const_tinytc_prog_t p)#

Get context.

Parameters:

p – program

Returns:

Compiler context

SPIR-V module#

SPIR-V module Functions#

dump(const_tinytc_spv_mod_t)#

inline void tinytc::dump(const_tinytc_spv_mod_t mod)#

Dump module to stderr.

Parameters:

mod – SPIR-V module