C++ API Reference

C++ API Reference#

class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel #

#include <nn_factory.h>

The ModelFactory class implements a generic interface for NPU network generation and inference. It supports only single input single output operations with input of shape [batch, input_channels] and output of shape [batch, output_channels].

Public Functions

inline ModelFactory(std::string device, bool profile = false)#

Construct a new Model Factory object.

Parameters:

device – target device
profile – enable/disable profiling

inline ov::op::Op *parameter(std::vector<size_t> shape, ov::element::Type_t dtype)#

Create a new N-Dimensional network parameter.

Parameters:

shape – parameter shape
dtype – parameter datatype

Returns:

ov::op::Op*

inline ov::op::Op *constant(ov::element::Type_t dtype, std::vector<size_t> shape, const void *dst)#

Create a new constant object.

Parameters:

dtype – element type of the tensor constant
shape – shape of the tensor constant
dst – data pointer of the tensor constant

Returns:

ov::op::Op*

template<class T, class = typename std::enable_if<std::is_fundamental<T>::value>::type> inline ov::op::Op *constant(ov::element::Type_t dtype, std::vector<size_t> shape, T value)#

Create a new constant object.

Parameters:

dtype – element type of the tensor constant
shape – shape of the tensor constant
value – value for initializing the tensor constant

Returns:

ov::op::Op*

inline ov::op::Op *matmul(ov::op::Op *input, ov::op::Op *&weights, bool trA = false, bool trB = true)#

Create a new matmul operation.

Parameters:

input – matmul lhs input
weights – matmul rhs input, a.k.a. weights
trA – transpose the lhs input
trB – transpose the rhs input

Returns:

ov::op::Op*

inline ov::op::Op *linear(ov::op::Op *input, ov::op::Op *weights, ov::op::Op *bias)#

Create a new linear operation.

Parameters:

input – matmul lhs input
weights – matmul rhs input, a.k.a. weights
bias – matmul bias input

Returns:

ov::op::Op*

inline ov::op::Op *convolution(ov::op::Op *input, ov::op::Op *&weights, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> dilations, size_t groups = 1)#

Create a new convolution operation.

Parameters:

input – convolution input
weights – convolution weights
strides – convolution strides
pads_begin – convolution padding begin
pads_ends – convolution padding end
dilations – convolution dilations
groups – convolution groups

Returns:

ov::op::Op*

inline ov::op::Op *average_pooling(ov::op::Op *input, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> kernel, bool exclude_pad = false, ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT)#

Create a new average pooling operation.

Parameters:

input – pooling input
strides – pooling strides
pads_begin – pooling padding begin
pads_ends – pooling padding end
kernel – pooling kernel
exclude_pad – exclude padding from the average calculation
rounding_type – rounding type
auto_pad – padding type

Returns:

ov::op::Op*

inline ov::op::Op *adaptive_average_pool(ov::op::Op *input, ov::op::Op *output_shape)#

Create a new adaptive average pooling operation.

Parameters:

input – pooling input
output_shape – output shape

Returns:

ov::op::Op*

inline ov::op::Op *max_pooling(ov::op::Op *input, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> kernel, ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT)#

Create a new max pooling operation.

Parameters:

input – pooling input
strides – pooling strides
pads_begin – pooling padding begin
pads_ends – pooling padding end
kernel – pooling kernel
exclude_pad – exclude padding from the max calculation
rounding_type – rounding type
auto_pad – padding type

Returns:

ov::op::Op*

inline ov::op::Op *adaptive_max_pool(ov::op::Op *input, ov::op::Op *output_shape)#

Create a new adaptive max pooling operation.

Parameters:

input – pooling input
output_shape – output shape

Returns:

ov::op::Op*

inline ov::op::Op *gather(ov::op::Op *input, ov::op::Op *indices, ov::op::Op *axis, const size_t batch_dims = 0)#

Create a new gather operation.

Parameters:

input – tensor from which slices are gathered
indices – tensor with indexes to gather
axis – The tensor is a dimension index to gather data from
batch_dims – The number of batch dimension in data and indices tensors.

Returns:

ov::op::Op*

inline ov::op::Op *reshape(ov::op::Op *input, ov::op::Op *shape)#

create a new reshape operation

Parameters:

input – tensor to be reshaped.
shape – new shape tensor, -1 is allowed for one dimension, it will be calculated automatically.

Returns:

ov::op::Op*

inline ov::op::Op *slice(ov::op::Op *input, ov::op::Op *begin, ov::op::Op *end, ov::op::Op *strides, const std::vector<int64_t> begin_mask, const std::vector<int64_t> end_mask)#

create a new strided slice

Parameters:

input – tensor to be strides.
begin – tensor with begin indices for each dimension.
end – tensor with end indices for each dimension.
strides – tensor with strides for each dimension.
begin_mask – mask for begin indices
end_mask – mask for end indices

Returns:

ov::op::Op*

inline ov::op::Op *transpose(ov::op::Op *input, ov::op::Op *input_order)#

create a new transpose operation

Parameters:

input – tensor to be transposed.
shape – permutation tensor, the new order of dimensions.

Returns:

ov::op::Op*

inline ov::op::Op *squeeze(ov::op::Op *input)#

create a new squeeze operation

Parameters:: input – tensor to be squeezed.
Returns:: ov::op::Op*

inline ov::op::Op *unsqueeze(ov::op::Op *input, ov::op::Op *axis)#

create a new squeeze operation

Parameters:

input – tensor to be squeezed.
axis – tensor with axes to unsqueeze

Returns:

ov::op::Op*

inline ov::op::Op *concat(ov::op::Op *x1, ov::op::Op *x2, int64_t axis)#

create a new concatenation operation

Parameters:

x1 – first concat input node
x2 – second concat input node
axis – axis along which to concatenate the input tensors

Returns:

ov::op::Op*

inline ov::op::Op *reduce_max(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce max operation

Parameters:

input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_mean(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce mean operation

Parameters:

input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_min(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce min operation

Parameters:

input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_prod(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce product operation

Parameters:

input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *reduce_sum(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#

create a new reduce sum operation

Parameters:

input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction

Returns:

ov::op::Op*

inline ov::op::Op *abs(ov::op::Op *input)#

Create a new absolute activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *acos(ov::op::Op *input)#

Create a new arccos activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *asin(ov::op::Op *input)#

Create a new arcsin activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *atan(ov::op::Op *input)#

Create a new arctan activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *ceiling(ov::op::Op *input)#

Create a new ceiling operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *clamp(ov::op::Op *input, float min, float max)#

Create a new clamp operation.

Parameters:

input – operation’s input node
min – lower bound of the <min;max> range
max – the upper bound of the <min;max> range

Returns:

ov::op::Op*

inline ov::op::Op *cos(ov::op::Op *input)#

Create a new cosine activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *cosh(ov::op::Op *input)#

Create a new cosh activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *elu(ov::op::Op *input, float alpha)#

Create a new elu operation.

Parameters:

input – operation’s input node
alpha – multiplier for negative values

Returns:

ov::op::Op*

inline ov::op::Op *erf(ov::op::Op *input)#

Create a new erf activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *exp(ov::op::Op *input)#

Create a new exp activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *floor(ov::op::Op *input)#

Create a new floor activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *grn(ov::op::Op *input, float bias)#

Create a new grn operation.

Parameters:

input – operation’s input node
bias – bias added to the variance

Returns:

ov::op::Op*

inline ov::op::Op *gelu(ov::op::Op *input, ov::op::GeluApproximationMode mode)#

Create a new gelu operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *log(ov::op::Op *input)#

Create a new natural log operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *negative(ov::op::Op *input)#

Create a new negative operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *relu(ov::op::Op *input)#

Create a new relu operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *sigmoid(ov::op::Op *input)#

Create a new sigmoid operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *sign(ov::op::Op *input)#

Create a new sign operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *sin(ov::op::Op *input)#

Create a new sine activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *sinh(ov::op::Op *input)#

Create a new sinh activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *sqrt(ov::op::Op *input)#

Create a new sqrt activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *tan(ov::op::Op *input)#

Create a new tan activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *tanh(ov::op::Op *input)#

Create a new tanh activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *acosh(ov::op::Op *input)#

Create a new arccosh activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *asinh(ov::op::Op *input)#

Create a new arcsinh activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *atanh(ov::op::Op *input)#

Create a new arctanh activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *hswish(ov::op::Op *input)#

Create a new hswish operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *mish(ov::op::Op *input)#

Create a new mish operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *softplus(ov::op::Op *input)#

Create a new softplus operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *hsigmoid(ov::op::Op *input)#

Create a new hsigmoid operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *round(ov::op::Op *input)#

Create a new round activation operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *softsign(ov::op::Op *input)#

Create a new softsign operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *swish(ov::op::Op *input)#

Create a new swish operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *softmax(ov::op::Op *input, int64_t axis = -1)#

Create a new softmax operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *convert_to(ov::op::Op *input, ov::element::Type_t dtype)#

Create a new conversion to dtype operation.

Parameters:: input – operation’s input node
Returns:: ov::op::Op*

inline ov::op::Op *eltwise_add(ov::op::Op *x1, ov::op::Op *&x2)#

Create a new elementwise add operation.

Parameters:

x1 – eltwise lhs input
x2 – eltwise rhs input

Returns:

ov::op::Op*

inline ov::op::Op *eltwise_mul(ov::op::Op *x1, ov::op::Op *&x2)#

Create a new elementwise multiply operation.

Parameters:

x1 – eltwise lhs input
x2 – eltwise rhs input

Returns:

ov::op::Op*

inline ov::op::Op *eltwise_div(ov::op::Op *x1, ov::op::Op *&x2)#

Create a new elementwise division operation.

Parameters:

x1 – eltwise lhs input
x2 – eltwise rhs input

Returns:

ov::op::Op*

inline ov::op::Op *scaled_dot_product_attention(ov::op::Op *query, ov::op::Op *key, ov::op::Op *value, ov::op::Op *attn_mask, bool is_causal)#

Create a new ScaledDotProductAttention operation.

Parameters:

query – sdpa query input
key – sdpa key input
value – sdpa value input
attn_mask – sdpa attn_mask input
is_causal – set the attention mask to causal. If it is set, attn_mask is ignored

Returns:

ov::op::Op*

inline ov::op::Op *normL2(ov::op::Op *data, ov::op::Op *axes, float eps)#

Create a new L2 normalization operation.

Parameters:

data – operation’s input node
axes – node indicating axes along which reduction is calculated
eps – the epsilon added to L2 norm

Returns:

ov::op::Op*

inline ov::op::Op *power(ov::op::Op *x1, ov::op::Op *x2, ov::op::AutoBroadcastType auto_broadcast)#

Create a new power operation.

Parameters:

x1 – operation’s input node
x2 – operation’s input node of the exponent
auto_broadcast – auto broadcast specification

Returns:

ov::op::Op*

inline ov::op::Op *log_softmax(ov::op::Op *input, int64_t axis)#

Create a new log softmax operation.

Parameters:

input – operation’s input node
axis – the axis position on which to calculate the LogSoftmax

Returns:

ov::op::Op*

inline void result(ov::op::Op *op)#

inline void compile()#

Compile the model.

Parameters:: result – the last operation in the network. Must have a [batch, output_channel] shape

Private Members

ov::ParameterVector parameters#

std::vector<std::shared_ptr<ov::op::Op>> operations#

ov::OutputVector results#

class OVInferenceModel#

#include <inference.h>

The OVInferenceModel implements the basic of NN inference on NPU.

Subclassed by intel_npu_acceleration_library::ModelFactory

Public Functions

inline OVInferenceModel(std::string device, bool profile = false)#

Construct a new OVInferenceModel object.

Parameters:

device – target device
profile – enable/disable profiling

inline virtual ~OVInferenceModel()#

inline void saveCompiledModel(const std::string &path)#

Save the model to a local path.

Parameters:: path –

inline void saveModel(const std::string &path)#

Save the model to a local path.

Parameters:: path –

inline void run()#

Run an inference.

Returns:: void

inline ov::Tensor getInputTensors(size_t idx)#

Get model input tensor.

Parameters:: idx – input tensor index
Returns:: ov::Tensor

inline ov::Tensor getOutputTensors(size_t idx)#

Get model output tensor.

Parameters:: idx – output tensor index
Returns:: ov::Tensor

inline void setInputTensor(void *_X, size_t idx)#

Set the input activations.

Parameters:

_X – pointer to the float16 input activation buffer
idx – input tensor index

inline void setOutputTensor(void *_X, size_t idx)#

Set the output activations.

Parameters:

_X – pointer to the float16 output activation buffer
idx – output tensor index

inline void setActivations(half_ptr _X, half_ptr _Out)#

Set the input and output activations.

Parameters:

_X – pointer to the float16 input activation
_Out – pointer to the float16 output activation

inline void setWeights(std::vector<std::shared_ptr<Parameter>> _weights)#

Set the network parameters.

Parameters:: _weights – vector of network parameters

Public Members

ov::Tensor X#: Model input tensor.

ov::Tensor Out#: Model output tensor.

std::thread wt_thread#: Async weight prefetch thread.

Protected Functions

inline void compile_model(std::string device)#

Compile a generated OV model to a specific device.

Parameters:: device – target compialtion device

inline virtual void create_ov_model()#: Create a ov model object. This class needs to be override in child classes.

Protected Attributes

std::shared_ptr<ov::Model> model#: OpenVINO model.

std::string device#: Target device.

bool profile#: Enable/disable profiling.

Private Members

ov::CompiledModel compiled_model#

ov::InferRequest infer_request#

class Parameter#

#include <parameters.h>

The Parameter class represents a generic NN parameter.

Subclassed by intel_npu_acceleration_library::ParameterWithConversion

Public Functions

inline Parameter(Shape shape)#

Construct a new Parameter object.

Parameters:: shape – parameter shape

inline Parameter(half_ptr _data, Shape shape)#

Construct a new Parameter object from fp16 data pointer.

Parameters:

_data – fp16 parameter data pointer
shape – parameter shape

inline Parameter(int8_t *_data, Shape shape)#

Construct a new Parameter object from int8 data pointer.

Parameters:

_data – int8 parameter data pointer
shape – parameter shape

inline Parameter(uint8_t *_data, Shape shape)#

Construct a new Parameter object from uint8 data pointer.

Parameters:

_data – uint8 parameter data pointer
shape – parameter shape

inline size_t get_size() const#

Get the size of the parameter.

Returns:: size_t

inline virtual void set_data(void *dst, size_t size)#

Set the Parameter data to the memory location dst of size.

Parameters:

dst – destination memory location
size – destination memory location size

inline virtual ~Parameter()#: Destroy the Parameter object.

Protected Attributes

Shape shape#: Parameter shape.

Private Members

void *data#

bool quantized#

class Parameters#

#include <parameters.h>

The class Parameters represents a list of NN parameter for a NPU kernel.

Public Functions

inline Parameters &add_parameter(half_ptr data, Shape shape)#

Add a new float16 parameter.

Parameters:

data –
shape –

Returns:

Parameters&

inline Parameters &add_parameter(int8_t *data, half_ptr scale, Shape shape)#

Add a new int8 parameter, provide also the scale.

Parameters:

data –
scale –
shape –

Returns:

Parameters&

inline Parameters &add_parameter(uint8_t *data, half_ptr scale, Shape shape)#

Add a new int4 parameter, provide also the scale.

Parameters:

data –
scale –
shape –

Returns:

Parameters&

inline Parameters &add_parameter(int8_t *data, float *scale, Shape shape)#

Add a new int8 parameter with explicit CPU conversion.

Parameters:

data –
scale –
shape –

Returns:

Parameters&

inline auto &get_parameters()#

Get the parameters.

Returns:: auto

Private Members

std::vector<std::shared_ptr<Parameter>> parameters#

class ParameterWithConversion : public intel_npu_acceleration_library::Parameter #

#include <parameters.h>

The ParameterWithConversion represent a generic quantized NN parameter where the conversion to fp16 is performed explicitly on CPU. The conversion equation is Y_float = Scale * float(data)

Public Functions

inline ParameterWithConversion(int8_t *data, float *scale, Shape shape)#

Construct a new ParameterWithConversion object from int8 data, float scale and shape.

Parameters:

data – int8 data buffer
scale – float per output channel scale
shape – parameter shape

inline virtual void set_data(void *dst, size_t size)#

Set the Parameter data to the memory location dst of size. Here is where the conversion from int to float is performed.

Parameters:

dst – destination memory location
size – destination memory location size

Private Members

int8_t *data#

float *scale#

class Shape#

#include <parameters.h>

A class representing a generic tensor shape.

Public Functions

inline Shape(std::initializer_list<size_t> dims)#

Construct a new Shape object.

Parameters:: dims – : a list of integers representing each dimension size

inline Shape(std::vector<size_t> &dims)#

Construct a new Shape object.

Parameters:: dims – : a list of integers representing each dimension size

inline const size_t &operator[](int idx)#

Overload of the operator []. Return the dimension at index idx.

Parameters:: idx –
Returns:: const size_t&

inline size_t get_size() const#

Get the number of element of the tensor.

Returns:: size_t

Private Members

std::vector<size_t> dimensions#

namespace intel_npu_acceleration_library#

Functions

bool _isNPUAvailable(ov::Core &core)#

Return true if the NPU is available on the system, otherwise return false.

Parameters:: core – ov::Cor object
Returns:: true NPU AI accelerator is available
Returns:: false NPU AI accelerator is not available

uint32_t driver_version(ov::Core &core)#

ov::element::Type_t dtype_from_string(const std::string &dtype)#

void compressToI4(const int8_t *src, uint8_t *dst, size_t size)#

Compress a int8 vector to I4 format.

Parameters:

src – pointer to the source int8 buffer
dst – pointer to the destination uint8 buffer
size – size of the src and dst buffers

void vector_to_fp16(const int8_t *src, float scale, half_ptr dst, size_t size)#

Convert a int8 vector to fp16 given a scalar scale.

Parameters:

src – pointer to the source int8 buffer
scale – Float scale
dst – pointer to the destination float16 buffer
size – size of the src and dst buffers

void array_to_fp16_worker(const int8_t *input, float *scale, half_ptr output, size_t input_channels, size_t output_channels)#

Convert a int8 array to fp16 given a per output channel scale vector.

Parameters:

input – pointer to the source int8 buffer of shape [output_channels, input_channels]
scale – pointer of a float scale vector of shape [output_channels]
output – dst pointer to the destination float16 buffer of shape [output_channels, input_channels]
input_channels – number of input channels
output_channels – number of output channels

void to_fp16(const int8_t *input, float *scale, half_ptr output, size_t input_channels, size_t output_channels, unsigned int num_threads)#

Convert a int8 array to fp16 given a per output channel scale vector.

Parameters:

input – pointer to the source int8 buffer of shape [output_channels, input_channels]
scale – pointer of a float scale vector of shape [output_channels]
output – dst pointer to the destination float16 buffer of shape [output_channels, input_channels]
input_channels – number of input channels
output_channels – number of output channels
num_threads – number of parallel threads to use

void *create_remote_tensor(const ov::element::Type dtype, const ov::Shape &shape, void *buffer)#

Create a remote tensor.

Parameters:

dtype – tensor data type
shape – tensor shape
buffer – tensor buffer

Returns:

ov::Tensor

Variables

static constexpr ov::Property<std::string> npu_compiler_type = {"NPU_COMPILER_TYPE"}#

static constexpr ov::Property<std::string> npu_parameters = {"NPU_COMPILATION_MODE_PARAMS"}#

static ov::Core core#: OpenVINO core object.

file common.h: #include “openvino/openvino.hpp”

#include “openvino/opsets/opset1.hpp”

#include “openvino/opsets/opset13.hpp”

#include “openvino/opsets/opset4.hpp”

#include “openvino/opsets/opset5.hpp”

#include “openvino/opsets/opset6.hpp”

#include “openvino/opsets/opset7.hpp”

#include “openvino/opsets/opset8.hpp”

#include “openvino/opsets/opset9.hpp”

#include “openvino/runtime/intel_npu/properties.hpp”

file conversion.h: #include <immintrin.h>

#include <iostream>

#include <thread>

#include <vector>

#include “intel_npu_acceleration_library/common.h”

file inference.h: #include <atomic>

#include <condition_variable>

#include <cstdint>

#include <cstdlib>

#include <cstring>

#include <fstream>

#include <iostream>

#include <limits>

#include <memory>

#include <mutex>

#include <string>

#include <thread>

#include <vector>

#include “intel_npu_acceleration_library/common.h”

#include “intel_npu_acceleration_library/parameters.h”

file nn_factory.h

#include “intel_npu_acceleration_library/inference.h”

Typedefs

typedef ov::Output<ov::Node> OVNode#

file parameters.h: #include <memory>

#include “intel_npu_acceleration_library/common.h”

#include “intel_npu_acceleration_library/conversion.h”

dir include

dir intel_npu_acceleration_library

C++ API Reference

Contents

C++ API Reference#