C++ API Reference#
-
class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel#
- #include <nn_factory.h>
The ModelFactory class implements a generic interface for NPU network generation and inference. It supports only single input single output operations with input of shape [batch, input_channels] and output of shape [batch, output_channels].
Public Functions
-
inline ModelFactory(std::string device, bool profile = false)#
Construct a new Model Factory object.
- Parameters:
device – target device
profile – enable/disable profiling
-
inline ov::op::Op *parameter(std::vector<size_t> shape, ov::element::Type_t dtype)#
Create a new N-Dimensional network parameter.
- Parameters:
shape – parameter shape
dtype – parameter datatype
- Returns:
ov::op::Op*
-
inline ov::op::Op *constant(ov::element::Type_t dtype, std::vector<size_t> shape, const void *dst)#
Create a new constant object.
- Parameters:
dtype – element type of the tensor constant
shape – shape of the tensor constant
dst – data pointer of the tensor constant
- Returns:
ov::op::Op*
-
template<class T, class = typename std::enable_if<std::is_fundamental<T>::value>::type>
inline ov::op::Op *constant(ov::element::Type_t dtype, std::vector<size_t> shape, T value)# Create a new constant object.
- Parameters:
dtype – element type of the tensor constant
shape – shape of the tensor constant
value – value for initializing the tensor constant
- Returns:
ov::op::Op*
-
inline ov::op::Op *matmul(ov::op::Op *input, ov::op::Op *&weights, bool trA = false, bool trB = true)#
Create a new matmul operation.
- Parameters:
input – matmul lhs input
weights – matmul rhs input, a.k.a. weights
trA – transpose the lhs input
trB – transpose the rhs input
- Returns:
ov::op::Op*
-
inline ov::op::Op *linear(ov::op::Op *input, ov::op::Op *weights, ov::op::Op *bias)#
Create a new linear operation.
- Parameters:
input – matmul lhs input
weights – matmul rhs input, a.k.a. weights
bias – matmul bias input
- Returns:
ov::op::Op*
-
inline ov::op::Op *convolution(ov::op::Op *input, ov::op::Op *&weights, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> dilations, size_t groups = 1)#
Create a new convolution operation.
- Parameters:
input – convolution input
weights – convolution weights
strides – convolution strides
pads_begin – convolution padding begin
pads_ends – convolution padding end
dilations – convolution dilations
groups – convolution groups
- Returns:
ov::op::Op*
-
inline ov::op::Op *average_pooling(ov::op::Op *input, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> kernel, bool exclude_pad = false, ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT)#
Create a new average pooling operation.
- Parameters:
input – pooling input
strides – pooling strides
pads_begin – pooling padding begin
pads_ends – pooling padding end
kernel – pooling kernel
exclude_pad – exclude padding from the average calculation
rounding_type – rounding type
auto_pad – padding type
- Returns:
ov::op::Op*
-
inline ov::op::Op *adaptive_average_pool(ov::op::Op *input, ov::op::Op *output_shape)#
Create a new adaptive average pooling operation.
- Parameters:
input – pooling input
output_shape – output shape
- Returns:
ov::op::Op*
-
inline ov::op::Op *max_pooling(ov::op::Op *input, std::vector<size_t> strides, std::vector<size_t> pads_begin, std::vector<size_t> pads_ends, std::vector<size_t> kernel, ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR, ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT)#
Create a new max pooling operation.
- Parameters:
input – pooling input
strides – pooling strides
pads_begin – pooling padding begin
pads_ends – pooling padding end
kernel – pooling kernel
exclude_pad – exclude padding from the max calculation
rounding_type – rounding type
auto_pad – padding type
- Returns:
ov::op::Op*
-
inline ov::op::Op *adaptive_max_pool(ov::op::Op *input, ov::op::Op *output_shape)#
Create a new adaptive max pooling operation.
- Parameters:
input – pooling input
output_shape – output shape
- Returns:
ov::op::Op*
-
inline ov::op::Op *gather(ov::op::Op *input, ov::op::Op *indices, ov::op::Op *axis, const size_t batch_dims = 0)#
Create a new gather operation.
- Parameters:
input – tensor from which slices are gathered
indices – tensor with indexes to gather
axis – The tensor is a dimension index to gather data from
batch_dims – The number of batch dimension in data and indices tensors.
- Returns:
ov::op::Op*
-
inline ov::op::Op *reshape(ov::op::Op *input, ov::op::Op *shape)#
create a new reshape operation
- Parameters:
input – tensor to be reshaped.
shape – new shape tensor, -1 is allowed for one dimension, it will be calculated automatically.
- Returns:
ov::op::Op*
-
inline ov::op::Op *slice(ov::op::Op *input, ov::op::Op *begin, ov::op::Op *end, ov::op::Op *strides, const std::vector<int64_t> begin_mask, const std::vector<int64_t> end_mask)#
create a new strided slice
- Parameters:
input – tensor to be strides.
begin – tensor with begin indices for each dimension.
end – tensor with end indices for each dimension.
strides – tensor with strides for each dimension.
begin_mask – mask for begin indices
end_mask – mask for end indices
- Returns:
ov::op::Op*
-
inline ov::op::Op *transpose(ov::op::Op *input, ov::op::Op *input_order)#
create a new transpose operation
- Parameters:
input – tensor to be transposed.
shape – permutation tensor, the new order of dimensions.
- Returns:
ov::op::Op*
-
inline ov::op::Op *squeeze(ov::op::Op *input)#
create a new squeeze operation
- Parameters:
input – tensor to be squeezed.
- Returns:
ov::op::Op*
-
inline ov::op::Op *unsqueeze(ov::op::Op *input, ov::op::Op *axis)#
create a new squeeze operation
- Parameters:
input – tensor to be squeezed.
axis – tensor with axes to unsqueeze
- Returns:
ov::op::Op*
-
inline ov::op::Op *concat(ov::op::Op *x1, ov::op::Op *x2, int64_t axis)#
create a new concatenation operation
- Parameters:
x1 – first concat input node
x2 – second concat input node
axis – axis along which to concatenate the input tensors
- Returns:
ov::op::Op*
-
inline ov::op::Op *reduce_max(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#
create a new reduce max operation
- Parameters:
input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction
- Returns:
ov::op::Op*
-
inline ov::op::Op *reduce_mean(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#
create a new reduce mean operation
- Parameters:
input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction
- Returns:
ov::op::Op*
-
inline ov::op::Op *reduce_min(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#
create a new reduce min operation
- Parameters:
input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction
- Returns:
ov::op::Op*
-
inline ov::op::Op *reduce_prod(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#
create a new reduce product operation
- Parameters:
input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction
- Returns:
ov::op::Op*
-
inline ov::op::Op *reduce_sum(ov::op::Op *input, ov::op::Op *reduction_axes, bool keep_dims)#
create a new reduce sum operation
- Parameters:
input – operation’s input node
reduction_axes – the axis positions to be reduced
keep_dims – if set to 1 it holds axes that are used for reduction
- Returns:
ov::op::Op*
-
inline ov::op::Op *abs(ov::op::Op *input)#
Create a new absolute activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *acos(ov::op::Op *input)#
Create a new arccos activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *asin(ov::op::Op *input)#
Create a new arcsin activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *atan(ov::op::Op *input)#
Create a new arctan activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *ceiling(ov::op::Op *input)#
Create a new ceiling operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *clamp(ov::op::Op *input, float min, float max)#
Create a new clamp operation.
- Parameters:
input – operation’s input node
min – lower bound of the <min;max> range
max – the upper bound of the <min;max> range
- Returns:
ov::op::Op*
-
inline ov::op::Op *cos(ov::op::Op *input)#
Create a new cosine activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *cosh(ov::op::Op *input)#
Create a new cosh activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *elu(ov::op::Op *input, float alpha)#
Create a new elu operation.
- Parameters:
input – operation’s input node
alpha – multiplier for negative values
- Returns:
ov::op::Op*
-
inline ov::op::Op *erf(ov::op::Op *input)#
Create a new erf activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *exp(ov::op::Op *input)#
Create a new exp activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *floor(ov::op::Op *input)#
Create a new floor activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *grn(ov::op::Op *input, float bias)#
Create a new grn operation.
- Parameters:
input – operation’s input node
bias – bias added to the variance
- Returns:
ov::op::Op*
-
inline ov::op::Op *gelu(ov::op::Op *input, ov::op::GeluApproximationMode mode)#
Create a new gelu operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *log(ov::op::Op *input)#
Create a new natural log operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *negative(ov::op::Op *input)#
Create a new negative operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *relu(ov::op::Op *input)#
Create a new relu operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *sigmoid(ov::op::Op *input)#
Create a new sigmoid operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *sign(ov::op::Op *input)#
Create a new sign operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *sin(ov::op::Op *input)#
Create a new sine activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *sinh(ov::op::Op *input)#
Create a new sinh activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *sqrt(ov::op::Op *input)#
Create a new sqrt activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *tan(ov::op::Op *input)#
Create a new tan activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *tanh(ov::op::Op *input)#
Create a new tanh activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *acosh(ov::op::Op *input)#
Create a new arccosh activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *asinh(ov::op::Op *input)#
Create a new arcsinh activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *atanh(ov::op::Op *input)#
Create a new arctanh activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *hswish(ov::op::Op *input)#
Create a new hswish operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *mish(ov::op::Op *input)#
Create a new mish operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *softplus(ov::op::Op *input)#
Create a new softplus operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *hsigmoid(ov::op::Op *input)#
Create a new hsigmoid operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *round(ov::op::Op *input)#
Create a new round activation operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *softsign(ov::op::Op *input)#
Create a new softsign operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *swish(ov::op::Op *input)#
Create a new swish operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *softmax(ov::op::Op *input, int64_t axis = -1)#
Create a new softmax operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *convert_to(ov::op::Op *input, ov::element::Type_t dtype)#
Create a new conversion to dtype operation.
- Parameters:
input – operation’s input node
- Returns:
ov::op::Op*
-
inline ov::op::Op *eltwise_add(ov::op::Op *x1, ov::op::Op *&x2)#
Create a new elementwise add operation.
- Parameters:
x1 – eltwise lhs input
x2 – eltwise rhs input
- Returns:
ov::op::Op*
-
inline ov::op::Op *eltwise_mul(ov::op::Op *x1, ov::op::Op *&x2)#
Create a new elementwise multiply operation.
- Parameters:
x1 – eltwise lhs input
x2 – eltwise rhs input
- Returns:
ov::op::Op*
-
inline ov::op::Op *eltwise_div(ov::op::Op *x1, ov::op::Op *&x2)#
Create a new elementwise division operation.
- Parameters:
x1 – eltwise lhs input
x2 – eltwise rhs input
- Returns:
ov::op::Op*
-
inline ov::op::Op *scaled_dot_product_attention(ov::op::Op *query, ov::op::Op *key, ov::op::Op *value, ov::op::Op *attn_mask, bool is_causal)#
Create a new ScaledDotProductAttention operation.
- Parameters:
query – sdpa query input
key – sdpa key input
value – sdpa value input
attn_mask – sdpa attn_mask input
is_causal – set the attention mask to causal. If it is set, attn_mask is ignored
- Returns:
ov::op::Op*
-
inline ov::op::Op *normL2(ov::op::Op *data, ov::op::Op *axes, float eps)#
Create a new L2 normalization operation.
- Parameters:
data – operation’s input node
axes – node indicating axes along which reduction is calculated
eps – the epsilon added to L2 norm
- Returns:
ov::op::Op*
-
inline ov::op::Op *power(ov::op::Op *x1, ov::op::Op *x2, ov::op::AutoBroadcastType auto_broadcast)#
Create a new power operation.
- Parameters:
x1 – operation’s input node
x2 – operation’s input node of the exponent
auto_broadcast – auto broadcast specification
- Returns:
ov::op::Op*
-
inline ov::op::Op *log_softmax(ov::op::Op *input, int64_t axis)#
Create a new log softmax operation.
- Parameters:
input – operation’s input node
axis – the axis position on which to calculate the LogSoftmax
- Returns:
ov::op::Op*
-
inline void result(ov::op::Op *op)#
-
inline void compile()#
Compile the model.
- Parameters:
result – the last operation in the network. Must have a [batch, output_channel] shape
-
inline ModelFactory(std::string device, bool profile = false)#
-
class OVInferenceModel#
- #include <inference.h>
The OVInferenceModel implements the basic of NN inference on NPU.
Subclassed by intel_npu_acceleration_library::ModelFactory
Public Functions
-
inline OVInferenceModel(std::string device, bool profile = false)#
Construct a new OVInferenceModel object.
- Parameters:
device – target device
profile – enable/disable profiling
-
inline virtual ~OVInferenceModel()#
-
inline void saveCompiledModel(const std::string &path)#
Save the model to a local path.
- Parameters:
path –
-
inline void saveModel(const std::string &path)#
Save the model to a local path.
- Parameters:
path –
-
inline void run()#
Run an inference.
- Returns:
void
-
inline ov::Tensor getInputTensors(size_t idx)#
Get model input tensor.
- Parameters:
idx – input tensor index
- Returns:
ov::Tensor
-
inline ov::Tensor getOutputTensors(size_t idx)#
Get model output tensor.
- Parameters:
idx – output tensor index
- Returns:
ov::Tensor
-
inline void setInputTensor(void *_X, size_t idx)#
Set the input activations.
- Parameters:
_X – pointer to the float16 input activation buffer
idx – input tensor index
-
inline void setOutputTensor(void *_X, size_t idx)#
Set the output activations.
- Parameters:
_X – pointer to the float16 output activation buffer
idx – output tensor index
-
inline void setActivations(half_ptr _X, half_ptr _Out)#
Set the input and output activations.
- Parameters:
_X – pointer to the float16 input activation
_Out – pointer to the float16 output activation
Set the network parameters.
- Parameters:
_weights – vector of network parameters
Public Members
-
ov::Tensor X#
Model input tensor.
-
ov::Tensor Out#
Model output tensor.
-
std::thread wt_thread#
Async weight prefetch thread.
Protected Functions
-
inline void compile_model(std::string device)#
Compile a generated OV model to a specific device.
- Parameters:
device – target compialtion device
-
inline virtual void create_ov_model()#
Create a ov model object. This class needs to be override in child classes.
-
inline OVInferenceModel(std::string device, bool profile = false)#
-
class Parameter#
- #include <parameters.h>
The Parameter class represents a generic NN parameter.
Subclassed by intel_npu_acceleration_library::ParameterWithConversion
Public Functions
-
inline Parameter(Shape shape)#
Construct a new Parameter object.
- Parameters:
shape – parameter shape
-
inline Parameter(half_ptr _data, Shape shape)#
Construct a new Parameter object from fp16 data pointer.
- Parameters:
_data – fp16 parameter data pointer
shape – parameter shape
-
inline Parameter(int8_t *_data, Shape shape)#
Construct a new Parameter object from int8 data pointer.
- Parameters:
_data – int8 parameter data pointer
shape – parameter shape
-
inline Parameter(uint8_t *_data, Shape shape)#
Construct a new Parameter object from uint8 data pointer.
- Parameters:
_data – uint8 parameter data pointer
shape – parameter shape
-
inline size_t get_size() const#
Get the size of the parameter.
- Returns:
size_t
-
inline Parameter(Shape shape)#
-
class Parameters#
- #include <parameters.h>
The class Parameters represents a list of NN parameter for a NPU kernel.
Public Functions
-
inline Parameters &add_parameter(half_ptr data, Shape shape)#
Add a new float16 parameter.
- Parameters:
data –
shape –
- Returns:
-
inline Parameters &add_parameter(int8_t *data, half_ptr scale, Shape shape)#
Add a new int8 parameter, provide also the scale.
- Parameters:
data –
scale –
shape –
- Returns:
-
inline Parameters &add_parameter(uint8_t *data, half_ptr scale, Shape shape)#
Add a new int4 parameter, provide also the scale.
- Parameters:
data –
scale –
shape –
- Returns:
-
inline Parameters &add_parameter(int8_t *data, float *scale, Shape shape)#
Add a new int8 parameter with explicit CPU conversion.
- Parameters:
data –
scale –
shape –
- Returns:
-
inline auto &get_parameters()#
Get the parameters.
- Returns:
auto
-
inline Parameters &add_parameter(half_ptr data, Shape shape)#
-
class ParameterWithConversion : public intel_npu_acceleration_library::Parameter#
- #include <parameters.h>
The ParameterWithConversion represent a generic quantized NN parameter where the conversion to fp16 is performed explicitly on CPU. The conversion equation is Y_float = Scale * float(data)
Public Functions
-
inline ParameterWithConversion(int8_t *data, float *scale, Shape shape)#
Construct a new ParameterWithConversion object from int8 data, float scale and shape.
- Parameters:
data – int8 data buffer
scale – float per output channel scale
shape – parameter shape
-
inline ParameterWithConversion(int8_t *data, float *scale, Shape shape)#
-
class Shape#
- #include <parameters.h>
A class representing a generic tensor shape.
Public Functions
-
inline Shape(std::initializer_list<size_t> dims)#
Construct a new Shape object.
- Parameters:
dims – : a list of integers representing each dimension size
-
inline Shape(std::vector<size_t> &dims)#
Construct a new Shape object.
- Parameters:
dims – : a list of integers representing each dimension size
-
inline const size_t &operator[](int idx)#
Overload of the operator []. Return the dimension at index idx.
- Parameters:
idx –
- Returns:
const size_t&
-
inline size_t get_size() const#
Get the number of element of the tensor.
- Returns:
size_t
Private Members
-
std::vector<size_t> dimensions#
-
inline Shape(std::initializer_list<size_t> dims)#
-
namespace intel_npu_acceleration_library#
Functions
-
bool _isNPUAvailable(ov::Core &core)#
Return true if the NPU is available on the system, otherwise return false.
- Parameters:
core – ov::Cor object
- Returns:
true NPU AI accelerator is available
- Returns:
false NPU AI accelerator is not available
-
uint32_t driver_version(ov::Core &core)#
-
ov::element::Type_t dtype_from_string(const std::string &dtype)#
-
void compressToI4(const int8_t *src, uint8_t *dst, size_t size)#
Compress a int8 vector to I4 format.
- Parameters:
src – pointer to the source int8 buffer
dst – pointer to the destination uint8 buffer
size – size of the src and dst buffers
-
void vector_to_fp16(const int8_t *src, float scale, half_ptr dst, size_t size)#
Convert a int8 vector to fp16 given a scalar scale.
- Parameters:
src – pointer to the source int8 buffer
scale – Float scale
dst – pointer to the destination float16 buffer
size – size of the src and dst buffers
-
void array_to_fp16_worker(const int8_t *input, float *scale, half_ptr output, size_t input_channels, size_t output_channels)#
Convert a int8 array to fp16 given a per output channel scale vector.
- Parameters:
input – pointer to the source int8 buffer of shape [output_channels, input_channels]
scale – pointer of a float scale vector of shape [output_channels]
output – dst pointer to the destination float16 buffer of shape [output_channels, input_channels]
input_channels – number of input channels
output_channels – number of output channels
-
void to_fp16(const int8_t *input, float *scale, half_ptr output, size_t input_channels, size_t output_channels, unsigned int num_threads)#
Convert a int8 array to fp16 given a per output channel scale vector.
- Parameters:
input – pointer to the source int8 buffer of shape [output_channels, input_channels]
scale – pointer of a float scale vector of shape [output_channels]
output – dst pointer to the destination float16 buffer of shape [output_channels, input_channels]
input_channels – number of input channels
output_channels – number of output channels
num_threads – number of parallel threads to use
-
void *create_remote_tensor(const ov::element::Type dtype, const ov::Shape &shape, void *buffer)#
Create a remote tensor.
- Parameters:
dtype – tensor data type
shape – tensor shape
buffer – tensor buffer
- Returns:
ov::Tensor
-
bool _isNPUAvailable(ov::Core &core)#
- file common.h
- #include “openvino/openvino.hpp”#include “openvino/opsets/opset1.hpp”#include “openvino/opsets/opset13.hpp”#include “openvino/opsets/opset4.hpp”#include “openvino/opsets/opset5.hpp”#include “openvino/opsets/opset6.hpp”#include “openvino/opsets/opset7.hpp”#include “openvino/opsets/opset8.hpp”#include “openvino/opsets/opset9.hpp”#include “openvino/runtime/intel_npu/properties.hpp”
- file conversion.h
- #include <immintrin.h>#include <iostream>#include <thread>#include <vector>#include “intel_npu_acceleration_library/common.h”
- file inference.h
- #include <atomic>#include <condition_variable>#include <cstdint>#include <cstdlib>#include <cstring>#include <fstream>#include <iostream>#include <limits>#include <memory>#include <mutex>#include <string>#include <thread>#include <vector>#include “intel_npu_acceleration_library/common.h”#include “intel_npu_acceleration_library/parameters.h”
- file nn_factory.h
- #include “intel_npu_acceleration_library/inference.h”
Typedefs
-
typedef ov::Output<ov::Node> OVNode#
-
typedef ov::Output<ov::Node> OVNode#
- file parameters.h
- #include <memory>#include “intel_npu_acceleration_library/common.h”#include “intel_npu_acceleration_library/conversion.h”
- dir include
- dir intel_npu_acceleration_library