Index A | B | C | D | F | G | H | I | L | M | N | O | P | Q | R | S | W A adapt_weight() (in module intel_npu_acceleration_library.backend.base) add_to_map() (intel_npu_acceleration_library.backend.base.BaseNPUBackendWithPrefetch method) apply_horizontal_fusion() (in module intel_npu_acceleration_library.compiler) AutogradMatMul (class in intel_npu_acceleration_library.nn.autograd) B backward() (intel_npu_acceleration_library.nn.autograd.AutogradMatMul static method) BaseNPUBackend (class in intel_npu_acceleration_library.backend.base) BaseNPUBackendWithPrefetch (class in intel_npu_acceleration_library.backend.base) bias (intel_npu_acceleration_library.nn.Conv2d property) C clear_cache() (in module intel_npu_acceleration_library.backend) (in module intel_npu_acceleration_library.backend.runtime) compile() (in module intel_npu_acceleration_library) (in module intel_npu_acceleration_library.compiler) (intel_npu_acceleration_library.backend.factory.NNFactory method) (intel_npu_acceleration_library.backend.NNFactory method) compute_output_dim() (intel_npu_acceleration_library.nn.Conv2d method) Conv2d (class in intel_npu_acceleration_library.nn) create_parameters() (intel_npu_acceleration_library.backend.base.BaseNPUBackendWithPrefetch method) D delattr_recursively() (in module intel_npu_acceleration_library.optimizations) F forward() (intel_npu_acceleration_library.nn.autograd.AutogradMatMul static method) (intel_npu_acceleration_library.nn.Conv2d method) (intel_npu_acceleration_library.nn.Linear method) (intel_npu_acceleration_library.nn.linear.Linear method) (intel_npu_acceleration_library.nn.linear.QuantizedLinear method) (intel_npu_acceleration_library.nn.LlamaAttention method) (intel_npu_acceleration_library.nn.llm.FusedLlamaMLP method) (intel_npu_acceleration_library.nn.llm.LlamaAttention method) (intel_npu_acceleration_library.nn.llm.PhiMLP method) (intel_npu_acceleration_library.nn.PhiMLP method) (intel_npu_acceleration_library.nn.QuantizedLinear method) fromTensor() (intel_npu_acceleration_library.nn.Linear static method) (intel_npu_acceleration_library.nn.linear.Linear static method) fromTorch() (intel_npu_acceleration_library.nn.Conv2d static method) (intel_npu_acceleration_library.nn.Linear static method) (intel_npu_acceleration_library.nn.linear.Linear static method) (intel_npu_acceleration_library.nn.LlamaAttention static method) (intel_npu_acceleration_library.nn.llm.FusedLlamaMLP static method) (intel_npu_acceleration_library.nn.llm.LlamaAttention static method) (intel_npu_acceleration_library.nn.llm.PhiMLP static method) (intel_npu_acceleration_library.nn.PhiMLP static method) fuse_linear_layers() (in module intel_npu_acceleration_library.optimizations) FusedLlamaMLP (class in intel_npu_acceleration_library.nn.llm) G generate_with_static_shape() (in module intel_npu_acceleration_library.nn.llm) get_driver_version() (in module intel_npu_acceleration_library.backend) H horizontal_fusion_linear() (in module intel_npu_acceleration_library.optimizations) I intel_npu_acceleration_library module intel_npu_acceleration_library (C++ type) intel_npu_acceleration_library.backend module intel_npu_acceleration_library.backend.base module intel_npu_acceleration_library.backend.factory module intel_npu_acceleration_library.backend.linear module intel_npu_acceleration_library.backend.matmul module intel_npu_acceleration_library.backend.mlp module intel_npu_acceleration_library.backend.qlinear module intel_npu_acceleration_library.backend.qmatmul module intel_npu_acceleration_library.backend.runtime module intel_npu_acceleration_library.compiler module intel_npu_acceleration_library.nn module intel_npu_acceleration_library.nn.autograd module intel_npu_acceleration_library.nn.linear module intel_npu_acceleration_library.nn.llm module intel_npu_acceleration_library.optimizations module intel_npu_acceleration_library.quantization module intel_npu_acceleration_library::_isNPUAvailable (C++ function) intel_npu_acceleration_library::array_to_fp16_worker (C++ function) intel_npu_acceleration_library::ModelFactory (C++ class) intel_npu_acceleration_library::ModelFactory::compile (C++ function) intel_npu_acceleration_library::ModelFactory::convert_to_fp16 (C++ function) intel_npu_acceleration_library::ModelFactory::eltwise_add (C++ function) intel_npu_acceleration_library::ModelFactory::eltwise_div (C++ function) intel_npu_acceleration_library::ModelFactory::eltwise_mul (C++ function) intel_npu_acceleration_library::ModelFactory::gelu (C++ function) intel_npu_acceleration_library::ModelFactory::matmul (C++ function) intel_npu_acceleration_library::ModelFactory::ModelFactory (C++ function) intel_npu_acceleration_library::ModelFactory::operations (C++ member) intel_npu_acceleration_library::ModelFactory::parameter (C++ function) intel_npu_acceleration_library::ModelFactory::parameters (C++ member) intel_npu_acceleration_library::ModelFactory::softmax (C++ function) intel_npu_acceleration_library::ModelFactory::swish (C++ function) intel_npu_acceleration_library::npu_compiler_type (C++ member) intel_npu_acceleration_library::npu_parameters (C++ member) intel_npu_acceleration_library::OVInferenceModel (C++ class) intel_npu_acceleration_library::OVInferenceModel::batch (C++ member) intel_npu_acceleration_library::OVInferenceModel::compile_model (C++ function) intel_npu_acceleration_library::OVInferenceModel::compiled_model (C++ member) intel_npu_acceleration_library::OVInferenceModel::core (C++ member) intel_npu_acceleration_library::OVInferenceModel::create_ov_model (C++ function) intel_npu_acceleration_library::OVInferenceModel::device (C++ member) intel_npu_acceleration_library::OVInferenceModel::inC (C++ member) intel_npu_acceleration_library::OVInferenceModel::infer_request (C++ member) intel_npu_acceleration_library::OVInferenceModel::model (C++ member) intel_npu_acceleration_library::OVInferenceModel::mutex_ (C++ member) intel_npu_acceleration_library::OVInferenceModel::Out (C++ member) intel_npu_acceleration_library::OVInferenceModel::outC (C++ member) intel_npu_acceleration_library::OVInferenceModel::OVInferenceModel (C++ function) intel_npu_acceleration_library::OVInferenceModel::profile (C++ member) intel_npu_acceleration_library::OVInferenceModel::run (C++ function) intel_npu_acceleration_library::OVInferenceModel::saveCompiledModel (C++ function) intel_npu_acceleration_library::OVInferenceModel::saveModel (C++ function) intel_npu_acceleration_library::OVInferenceModel::setActivations (C++ function) intel_npu_acceleration_library::OVInferenceModel::setWeights (C++ function) intel_npu_acceleration_library::OVInferenceModel::wt_thread (C++ member) intel_npu_acceleration_library::OVInferenceModel::X (C++ member) intel_npu_acceleration_library::OVInferenceModel::~OVInferenceModel (C++ function) intel_npu_acceleration_library::Parameter (C++ class) intel_npu_acceleration_library::Parameter::data (C++ member) intel_npu_acceleration_library::Parameter::get_size (C++ function) intel_npu_acceleration_library::Parameter::Parameter (C++ function), [1], [2] intel_npu_acceleration_library::Parameter::quantized (C++ member) intel_npu_acceleration_library::Parameter::set_data (C++ function) intel_npu_acceleration_library::Parameter::shape (C++ member) intel_npu_acceleration_library::Parameter::~Parameter (C++ function) intel_npu_acceleration_library::Parameters (C++ class) intel_npu_acceleration_library::Parameters::add_parameter (C++ function), [1], [2] intel_npu_acceleration_library::Parameters::get_parameters (C++ function) intel_npu_acceleration_library::Parameters::parameters (C++ member) intel_npu_acceleration_library::ParameterWithConversion (C++ class) intel_npu_acceleration_library::ParameterWithConversion::data (C++ member) intel_npu_acceleration_library::ParameterWithConversion::ParameterWithConversion (C++ function) intel_npu_acceleration_library::ParameterWithConversion::scale (C++ member) intel_npu_acceleration_library::ParameterWithConversion::set_data (C++ function) intel_npu_acceleration_library::Shape (C++ class) intel_npu_acceleration_library::Shape::dimensions (C++ member) intel_npu_acceleration_library::Shape::get_size (C++ function) intel_npu_acceleration_library::Shape::operator[] (C++ function) intel_npu_acceleration_library::Shape::Shape (C++ function) intel_npu_acceleration_library::to_fp16 (C++ function) intel_npu_acceleration_library::vector_to_fp16 (C++ function) L Linear (class in intel_npu_acceleration_library.backend) (class in intel_npu_acceleration_library.backend.linear) (class in intel_npu_acceleration_library.nn) (class in intel_npu_acceleration_library.nn.linear) linear() (intel_npu_acceleration_library.backend.factory.NNFactory method) (intel_npu_acceleration_library.backend.NNFactory method) LlamaAttention (class in intel_npu_acceleration_library.nn) (class in intel_npu_acceleration_library.nn.llm) load_wt_fn() (intel_npu_acceleration_library.backend.base.BaseNPUBackendWithPrefetch method) lower_linear() (in module intel_npu_acceleration_library.compiler) lshift_insert() (in module intel_npu_acceleration_library.nn.llm) M MatMul (class in intel_npu_acceleration_library.backend) (class in intel_npu_acceleration_library.backend.matmul) MLP (class in intel_npu_acceleration_library.backend) (class in intel_npu_acceleration_library.backend.mlp) module intel_npu_acceleration_library intel_npu_acceleration_library.backend intel_npu_acceleration_library.backend.base intel_npu_acceleration_library.backend.factory intel_npu_acceleration_library.backend.linear intel_npu_acceleration_library.backend.matmul intel_npu_acceleration_library.backend.mlp intel_npu_acceleration_library.backend.qlinear intel_npu_acceleration_library.backend.qmatmul intel_npu_acceleration_library.backend.runtime intel_npu_acceleration_library.compiler intel_npu_acceleration_library.nn intel_npu_acceleration_library.nn.autograd intel_npu_acceleration_library.nn.linear intel_npu_acceleration_library.nn.llm intel_npu_acceleration_library.optimizations intel_npu_acceleration_library.quantization module_optimization() (in module intel_npu_acceleration_library.compiler) N NNFactory (class in intel_npu_acceleration_library.backend) (class in intel_npu_acceleration_library.backend.factory) npu() (in module intel_npu_acceleration_library.compiler) npu_available() (in module intel_npu_acceleration_library.backend) O optimize_llama_attention() (in module intel_npu_acceleration_library.compiler) OVNode (C++ type) P parameter() (intel_npu_acceleration_library.backend.factory.NNFactory method) (intel_npu_acceleration_library.backend.NNFactory method) PhiMLP (class in intel_npu_acceleration_library.nn) (class in intel_npu_acceleration_library.nn.llm) prefetchWeights() (intel_npu_acceleration_library.backend.base.BaseNPUBackendWithPrefetch method) Q QLinear (class in intel_npu_acceleration_library.backend) (class in intel_npu_acceleration_library.backend.qlinear) QMatMul (class in intel_npu_acceleration_library.backend) (class in intel_npu_acceleration_library.backend.qmatmul) quantize_tensor() (in module intel_npu_acceleration_library.quantization) QuantizedLinear (class in intel_npu_acceleration_library.nn) (class in intel_npu_acceleration_library.nn.linear) R run() (intel_npu_acceleration_library.backend.factory.NNFactory method) (intel_npu_acceleration_library.backend.Linear method) (intel_npu_acceleration_library.backend.linear.Linear method) (intel_npu_acceleration_library.backend.MatMul method) (intel_npu_acceleration_library.backend.matmul.MatMul method) (intel_npu_acceleration_library.backend.NNFactory method) (intel_npu_acceleration_library.backend.QLinear method) (intel_npu_acceleration_library.backend.qlinear.QLinear method) (intel_npu_acceleration_library.backend.QMatMul method) (intel_npu_acceleration_library.backend.qmatmul.QMatMul method) run_factory() (in module intel_npu_acceleration_library.backend) (in module intel_npu_acceleration_library.backend.runtime) run_matmul() (in module intel_npu_acceleration_library.backend) (in module intel_npu_acceleration_library.backend.runtime) S save() (intel_npu_acceleration_library.backend.base.BaseNPUBackend method) saveCompiledModel() (intel_npu_acceleration_library.backend.base.BaseNPUBackend method) set_contiguous() (in module intel_npu_acceleration_library.backend.runtime) setWeights() (intel_npu_acceleration_library.backend.base.BaseNPUBackendWithPrefetch method) W warm_up_decoder_model() (in module intel_npu_acceleration_library.nn.llm) weight (intel_npu_acceleration_library.nn.Conv2d property)