C++ API Reference

Contents

C++ API Reference#

class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel#
#include <nn_factory.h>

The ModelFactory class implements a generic interface for NPU network generation and inference. It supports only single input single output operations with input of shape [batch, input_channels] and output of shape [batch, output_channels].

Public Functions

inline ModelFactory(std::string device, bool profile = false)#

Construct a new Model Factory object.

Parameters:
  • device – target device

  • profile – enable/disable profiling

inline ov::op::Op *parameter(std::vector<size_t> shape, ov::element::Type_t dtype)#

Create a new N-Dimensional network parameter.

Parameters:
  • shape – parameter shape

  • dtype – parameter datatype

Returns:

ov::op::Op*

inline ov::op::Op *constant(ov::element::Type_t dtype, std::vector<size_t> shape, const void *dst)#

Create a new constant object.

Parameters:
  • dtype – element type of the tensor constant

  • shape – shape of the tensor constant

  • dst – data pointer of the tensor constant

Returns:

ov::op::Op*

template<class T, class = typename std::enable_if<std::is_fundamental<T>::value>::type>
inline ov::op::Op *constant(ov::element::Type_t dtype, std::vector<size_t> shape, T value)#

Create a new constant object.

Parameters:
  • dtype – element type of the tensor constant

  • shape – shape of the tensor constant

  • value – value for initializing the tensor constant

Returns:

ov::op::Op*

inline ov::op::Op *matmul(ov::op::Op *input, ov::op::Op *&weights, bool trA = false, bool trB = true)#

Create a new matmul operation.

Parameters:
  • input – matmul lhs input

  • weights – matmul rhs input, a.k.a. weights

  • trA – transpose the lhs input

  • trB – transpose the rhs input

Returns:

ov::op::Op*

inline ov::op::Op *linear(ov::op::Op *input, ov::op::Op *weights, ov::op::Op *bias)#

Create a new linear operation.

Parameters:
  • input – matmul lhs input

  • weights – matmul rhs input, a.k.a. weights

  • bias – matmul bias input

Returns:

ov::op::Op*

inline ov::op::Op *convolution(ov::op::Op *input, ov::op::Op *&weights, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> dilations, size_t groups = 1)#

Create a new convolution operation.

Parameters:
  • input – convolution input

  • weights – convolution weights

  • strides – convolution strides

  • pads_begin – convolution padding begin

  • pads_ends – convolution padding end

  • dilations – convolution dilations

  • groups – convolution groups

Returns:

ov::op::Op*

inline ov::op::Op *average_pooling(ov::op::Op *input, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> kernel, bool exclude_pad = false, ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT)#

Create a new average pooling operation.

Parameters:
  • input – pooling input

  • strides – pooling strides

  • pads_begin – pooling padding begin

  • pads_ends – pooling padding end

  • kernel – pooling kernel

  • exclude_pad – exclude padding from the average calculation

  • rounding_type – rounding type

  • auto_pad – padding type

Returns:

ov::op::Op*

inline ov::op::Op *adaptive_average_pool(ov::op::Op *input, ov::op::Op *output_shape)#

Create a new adaptive average pooling operation.

Parameters:
  • input – pooling input

  • output_shape – output shape

Returns:

ov::op::Op*

inline ov::op::Op *max_pooling(ov::op::Op *input, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> kernel, ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT)#

Create a new max pooling operation.

Parameters:
  • input – pooling input

  • strides – pooling strides

  • pads_begin – pooling padding begin

  • pads_ends – pooling padding end

  • kernel – pooling kernel

  • exclude_pad – exclude padding from the max calculation

  • rounding_type – rounding type

  • auto_pad – padding type

Returns:

ov::op::Op*

inline ov::op::Op *adaptive_max_pool(ov::op::Op *input, ov::op::Op *output_shape)#

Create a new adaptive max pooling operation.

Parameters:
  • input – pooling input

  • output_shape – output shape

Returns:

ov::op::Op*

inline ov::op::Op *gather(ov::op::Op *input, ov::op::Op *indices, ov::op::Op *axis, const size_t batch_dims = 0)#

Create a new gather operation.

Parameters:
  • input – tensor from which slices are gathered

  • indices – tensor with indexes to gather

  • axis – The tensor is a dimension index to gather data from

  • batch_dims – The number of batch dimension in data and indices tensors.

Returns:

ov::op::Op*

inline ov::op::Op *reshape(ov::op::Op *input, ov::op::Op *shape)#

create a new reshape operation

Parameters:
  • input – tensor to be reshaped.

  • shape – new shape tensor, -1 is allowed for one dimension, it will be calculated automatically.

Returns:

ov::op::Op*

inline ov::op::Op *slice(ov::op::Op *input, ov::op::Op *begin, ov::op::Op *end, ov::op::Op *strides, const std::vector<int64_t> begin_mask, const std::vector<int64_t> end_mask)#

create a new strided slice

Parameters:
  • input – tensor to be strides.

  • begin – tensor with begin indices for each dimension.

  • end – tensor with end indices for each dimension.

  • strides – tensor with strides for each dimension.

  • begin_mask – mask for begin indices

  • end_mask – mask for end indices

Returns:

ov::op::Op*

inline ov::op::Op *transpose(ov::op::Op *input, ov::op::Op *input_order)#

create a new transpose operation

Parameters:
  • input – tensor to be transposed.

  • shape – permutation tensor, the new order of dimensions.

Returns:

ov::op::Op*

inline ov::op::Op *squeeze(ov::op::Op *input)#

create a new squeeze operation

Parameters:

input – tensor to be squeezed.

Returns:

ov::op::Op*

inline ov::op::Op *unsqueeze(ov::op::Op *input, ov::op::Op *axis)#

create a new squeeze operation

Parameters:
  • input – tensor to be squeezed.

  • axis – tensor with axes to unsqueeze

Returns:

ov::op::Op*

inline ov::op::Op *concat(ov::op::Op *x1, ov::op::Op *x2, int64_t axis)#

create a new concatenation operation

Parameters:
  • x1 – first concat input node

  • x2 – second concat input node

  • axis – axis along which to concatenate the input tensors

Returns:

ov::op::Op*

inline ov::op::Op *reduce_max(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce max operation

Parameters:
  • input – operation’s input node

  • reduction_axes – the axis positions to be reduced

  • keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_mean(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce mean operation

Parameters:
  • input – operation’s input node

  • reduction_axes – the axis positions to be reduced

  • keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_min(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce min operation

Parameters:
  • input – operation’s input node

  • reduction_axes – the axis positions to be reduced

  • keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_prod(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce product operation

Parameters:
  • input – operation’s input node

  • reduction_axes – the axis positions to be reduced

  • keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_sum(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce sum operation

Parameters:
  • input – operation’s input node

  • reduction_axes – the axis positions to be reduced

  • keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *abs(ov::op::Op *input)#

Create a new absolute activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *acos(ov::op::Op *input)#

Create a new arccos activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *asin(ov::op::Op *input)#

Create a new arcsin activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *atan(ov::op::Op *input)#

Create a new arctan activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *ceiling(ov::op::Op *input)#

Create a new ceiling operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *clamp(ov::op::Op *input, float min, float max)#

Create a new clamp operation.

Parameters:
  • input – operation’s input node

  • min – lower bound of the <min;max> range

  • max – the upper bound of the <min;max> range

Returns:

ov::op::Op*

inline ov::op::Op *cos(ov::op::Op *input)#

Create a new cosine activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *cosh(ov::op::Op *input)#

Create a new cosh activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *elu(ov::op::Op *input, float alpha)#

Create a new elu operation.

Parameters:
  • input – operation’s input node

  • alpha – multiplier for negative values

Returns:

ov::op::Op*

inline ov::op::Op *erf(ov::op::Op *input)#

Create a new erf activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *exp(ov::op::Op *input)#

Create a new exp activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *floor(ov::op::Op *input)#

Create a new floor activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *grn(ov::op::Op *input, float bias)#

Create a new grn operation.

Parameters:
  • input – operation’s input node

  • bias – bias added to the variance

Returns:

ov::op::Op*

inline ov::op::Op *gelu(ov::op::Op *input, ov::op::GeluApproximationMode mode)#

Create a new gelu operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *log(ov::op::Op *input)#

Create a new natural log operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *negative(ov::op::Op *input)#

Create a new negative operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *relu(ov::op::Op *input)#

Create a new relu operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *sigmoid(ov::op::Op *input)#

Create a new sigmoid operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *sign(ov::op::Op *input)#

Create a new sign operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *sin(ov::op::Op *input)#

Create a new sine activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *sinh(ov::op::Op *input)#

Create a new sinh activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *sqrt(ov::op::Op *input)#

Create a new sqrt activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *tan(ov::op::Op *input)#

Create a new tan activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *tanh(ov::op::Op *input)#

Create a new tanh activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *acosh(ov::op::Op *input)#

Create a new arccosh activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *asinh(ov::op::Op *input)#

Create a new arcsinh activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *atanh(ov::op::Op *input)#

Create a new arctanh activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *hswish(ov::op::Op *input)#

Create a new hswish operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *mish(ov::op::Op *input)#

Create a new mish operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *softplus(ov::op::Op *input)#

Create a new softplus operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *hsigmoid(ov::op::Op *input)#

Create a new hsigmoid operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *round(ov::op::Op *input)#

Create a new round activation operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *softsign(ov::op::Op *input)#

Create a new softsign operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *swish(ov::op::Op *input)#

Create a new swish operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *softmax(ov::op::Op *input, int64_t axis = -1)#

Create a new softmax operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *convert_to(ov::op::Op *input, ov::element::Type_t dtype)#

Create a new conversion to dtype operation.

Parameters:

input – operation’s input node

Returns:

ov::op::Op*

inline ov::op::Op *eltwise_add(ov::op::Op *x1, ov::op::Op *&x2)#

Create a new elementwise add operation.

Parameters:
  • x1 – eltwise lhs input

  • x2 – eltwise rhs input

Returns:

ov::op::Op*

inline ov::op::Op *eltwise_mul(ov::op::Op *x1, ov::op::Op *&x2)#

Create a new elementwise multiply operation.

Parameters:
  • x1 – eltwise lhs input

  • x2 – eltwise rhs input

Returns:

ov::op::Op*

inline ov::op::Op *eltwise_div(ov::op::Op *x1, ov::op::Op *&x2)#

Create a new elementwise division operation.

Parameters:
  • x1 – eltwise lhs input

  • x2 – eltwise rhs input

Returns:

ov::op::Op*

inline ov::op::Op *scaled_dot_product_attention(ov::op::Op *query, ov::op::Op *key, ov::op::Op *value, ov::op::Op *attn_mask, bool is_causal)#

Create a new ScaledDotProductAttention operation.

Parameters:
  • query – sdpa query input

  • key – sdpa key input

  • value – sdpa value input

  • attn_mask – sdpa attn_mask input

  • is_causal – set the attention mask to causal. If it is set, attn_mask is ignored

Returns:

ov::op::Op*

inline ov::op::Op *normL2(ov::op::Op *data, ov::op::Op *axes, float eps)#

Create a new L2 normalization operation.

Parameters:
  • data – operation’s input node

  • axes – node indicating axes along which reduction is calculated

  • eps – the epsilon added to L2 norm

Returns:

ov::op::Op*

inline ov::op::Op *power(ov::op::Op *x1, ov::op::Op *x2, ov::op::AutoBroadcastType auto_broadcast)#

Create a new power operation.

Parameters:
  • x1 – operation’s input node

  • x2 – operation’s input node of the exponent

  • auto_broadcast – auto broadcast specification

Returns:

ov::op::Op*

inline ov::op::Op *log_softmax(ov::op::Op *input, int64_t axis)#

Create a new log softmax operation.

Parameters:
  • input – operation’s input node

  • axis – the axis position on which to calculate the LogSoftmax

Returns:

ov::op::Op*

inline void result(ov::op::Op *op)#
inline void compile()#

Compile the model.

Parameters:

result – the last operation in the network. Must have a [batch, output_channel] shape

Private Members

ov::ParameterVector parameters#
std::vector<std::shared_ptr<ov::op::Op>> operations#
ov::OutputVector results#
class OVInferenceModel#
#include <inference.h>

The OVInferenceModel implements the basic of NN inference on NPU.

Subclassed by intel_npu_acceleration_library::ModelFactory

Public Functions

inline OVInferenceModel(std::string device, bool profile = false)#

Construct a new OVInferenceModel object.

Parameters:
  • device – target device

  • profile – enable/disable profiling

inline virtual ~OVInferenceModel()#
inline void saveCompiledModel(const std::string &path)#

Save the model to a local path.

Parameters:

path

inline void saveModel(const std::string &path)#

Save the model to a local path.

Parameters:

path

inline void run()#

Run an inference.

Returns:

void

inline ov::Tensor getInputTensors(size_t idx)#

Get model input tensor.

Parameters:

idx – input tensor index

Returns:

ov::Tensor

inline ov::Tensor getOutputTensors(size_t idx)#

Get model output tensor.

Parameters:

idx – output tensor index

Returns:

ov::Tensor

inline void setInputTensor(void *_X, size_t idx)#

Set the input activations.

Parameters:
  • _X – pointer to the float16 input activation buffer

  • idx – input tensor index

inline void setOutputTensor(void *_X, size_t idx)#

Set the output activations.

Parameters:
  • _X – pointer to the float16 output activation buffer

  • idx – output tensor index

inline void setActivations(half_ptr _X, half_ptr _Out)#

Set the input and output activations.

Parameters:
  • _X – pointer to the float16 input activation

  • _Out – pointer to the float16 output activation

inline void setWeights(std::vector<std::shared_ptr<Parameter>> _weights)#

Set the network parameters.

Parameters:

_weights – vector of network parameters

Public Members

ov::Tensor X#

Model input tensor.

ov::Tensor Out#

Model output tensor.

std::thread wt_thread#

Async weight prefetch thread.

Protected Functions

inline void compile_model(std::string device)#

Compile a generated OV model to a specific device.

Parameters:

device – target compialtion device

inline virtual void create_ov_model()#

Create a ov model object. This class needs to be override in child classes.

Protected Attributes

std::shared_ptr<ov::Model> model#

OpenVINO model.

std::string device#

Target device.

bool profile#

Enable/disable profiling.

Private Members

ov::CompiledModel compiled_model#
ov::InferRequest infer_request#
class Parameter#
#include <parameters.h>

The Parameter class represents a generic NN parameter.

Subclassed by intel_npu_acceleration_library::ParameterWithConversion

Public Functions

inline Parameter(Shape shape)#

Construct a new Parameter object.

Parameters:

shape – parameter shape

inline Parameter(half_ptr _data, Shape shape)#

Construct a new Parameter object from fp16 data pointer.

Parameters:
  • _data – fp16 parameter data pointer

  • shape – parameter shape

inline Parameter(int8_t *_data, Shape shape)#

Construct a new Parameter object from int8 data pointer.

Parameters:
  • _data – int8 parameter data pointer

  • shape – parameter shape

inline Parameter(uint8_t *_data, Shape shape)#

Construct a new Parameter object from uint8 data pointer.

Parameters:
  • _data – uint8 parameter data pointer

  • shape – parameter shape

inline size_t get_size() const#

Get the size of the parameter.

Returns:

size_t

inline virtual void set_data(void *dst, size_t size)#

Set the Parameter data to the memory location dst of size.

Parameters:
  • dst – destination memory location

  • size – destination memory location size

inline virtual ~Parameter()#

Destroy the Parameter object.

Protected Attributes

Shape shape#

Parameter shape.

Private Members

void *data#
bool quantized#
class Parameters#
#include <parameters.h>

The class Parameters represents a list of NN parameter for a NPU kernel.

Public Functions

inline Parameters &add_parameter(half_ptr data, Shape shape)#

Add a new float16 parameter.

Parameters:
  • data

  • shape

Returns:

Parameters&

inline Parameters &add_parameter(int8_t *data, half_ptr scale, Shape shape)#

Add a new int8 parameter, provide also the scale.

Parameters:
  • data

  • scale

  • shape

Returns:

Parameters&

inline Parameters &add_parameter(uint8_t *data, half_ptr scale, Shape shape)#

Add a new int4 parameter, provide also the scale.

Parameters:
  • data

  • scale

  • shape

Returns:

Parameters&

inline Parameters &add_parameter(int8_t *data, float *scale, Shape shape)#

Add a new int8 parameter with explicit CPU conversion.

Parameters:
  • data

  • scale

  • shape

Returns:

Parameters&

inline auto &get_parameters()#

Get the parameters.

Returns:

auto

Private Members

std::vector<std::shared_ptr<Parameter>> parameters#
class ParameterWithConversion : public intel_npu_acceleration_library::Parameter#
#include <parameters.h>

The ParameterWithConversion represent a generic quantized NN parameter where the conversion to fp16 is performed explicitly on CPU. The conversion equation is Y_float = Scale * float(data)

Public Functions

inline ParameterWithConversion(int8_t *data, float *scale, Shape shape)#

Construct a new ParameterWithConversion object from int8 data, float scale and shape.

Parameters:
  • data – int8 data buffer

  • scale – float per output channel scale

  • shape – parameter shape

inline virtual void set_data(void *dst, size_t size)#

Set the Parameter data to the memory location dst of size. Here is where the conversion from int to float is performed.

Parameters:
  • dst – destination memory location

  • size – destination memory location size

Private Members

int8_t *data#
float *scale#
class Shape#
#include <parameters.h>

A class representing a generic tensor shape.

Public Functions

inline Shape(std::initializer_list<size_t> dims)#

Construct a new Shape object.

Parameters:

dims – : a list of integers representing each dimension size

inline Shape(std::vector<size_t> &dims)#

Construct a new Shape object.

Parameters:

dims – : a list of integers representing each dimension size

inline const size_t &operator[](int idx)#

Overload of the operator []. Return the dimension at index idx.

Parameters:

idx

Returns:

const size_t&

inline size_t get_size() const#

Get the number of element of the tensor.

Returns:

size_t

Private Members

std::vector<size_t> dimensions#
namespace intel_npu_acceleration_library#

Functions

bool _isNPUAvailable(ov::Core &core)#

Return true if the NPU is available on the system, otherwise return false.

Parameters:

core – ov::Cor object

Returns:

true NPU AI accelerator is available

Returns:

false NPU AI accelerator is not available

uint32_t driver_version(ov::Core &core)#
ov::element::Type_t dtype_from_string(const std::string &dtype)#
void compressToI4(const int8_t *src, uint8_t *dst, size_t size)#

Compress a int8 vector to I4 format.

Parameters:
  • src – pointer to the source int8 buffer

  • dst – pointer to the destination uint8 buffer

  • size – size of the src and dst buffers

void vector_to_fp16(const int8_t *src, float scale, half_ptr dst, size_t size)#

Convert a int8 vector to fp16 given a scalar scale.

Parameters:
  • src – pointer to the source int8 buffer

  • scale – Float scale

  • dst – pointer to the destination float16 buffer

  • size – size of the src and dst buffers

void array_to_fp16_worker(const int8_t *input, float *scale, half_ptr output, size_t input_channels, size_t output_channels)#

Convert a int8 array to fp16 given a per output channel scale vector.

Parameters:
  • input – pointer to the source int8 buffer of shape [output_channels, input_channels]

  • scale – pointer of a float scale vector of shape [output_channels]

  • output – dst pointer to the destination float16 buffer of shape [output_channels, input_channels]

  • input_channels – number of input channels

  • output_channels – number of output channels

void to_fp16(const int8_t *input, float *scale, half_ptr output, size_t input_channels, size_t output_channels, unsigned int num_threads)#

Convert a int8 array to fp16 given a per output channel scale vector.

Parameters:
  • input – pointer to the source int8 buffer of shape [output_channels, input_channels]

  • scale – pointer of a float scale vector of shape [output_channels]

  • output – dst pointer to the destination float16 buffer of shape [output_channels, input_channels]

  • input_channels – number of input channels

  • output_channels – number of output channels

  • num_threads – number of parallel threads to use

void *create_remote_tensor(const ov::element::Type dtype, const ov::Shape &shape, void *buffer)#

Create a remote tensor.

Parameters:
  • dtype – tensor data type

  • shape – tensor shape

  • buffer – tensor buffer

Returns:

ov::Tensor

Variables

static constexpr ov::Property<std::string> npu_compiler_type = {"NPU_COMPILER_TYPE"}#
static constexpr ov::Property<std::string> npu_parameters = {"NPU_COMPILATION_MODE_PARAMS"}#
static ov::Core core#

OpenVINO core object.

file common.h
#include “openvino/openvino.hpp”
#include “openvino/opsets/opset1.hpp”
#include “openvino/opsets/opset13.hpp”
#include “openvino/opsets/opset4.hpp”
#include “openvino/opsets/opset5.hpp”
#include “openvino/opsets/opset6.hpp”
#include “openvino/opsets/opset7.hpp”
#include “openvino/opsets/opset8.hpp”
#include “openvino/opsets/opset9.hpp”
#include “openvino/runtime/intel_npu/properties.hpp”
file conversion.h
#include <immintrin.h>
#include <iostream>
#include <thread>
#include <vector>
file inference.h
#include <atomic>
#include <condition_variable>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <mutex>
#include <string>
#include <thread>
#include <vector>
file nn_factory.h

Typedefs

typedef ov::Output<ov::Node> OVNode#
file parameters.h
#include <memory>
dir include
dir intel_npu_acceleration_library