Filter and Other Operations#
* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
constexpr const uint32_t source_size = 1000;
constexpr const uint64_t poly = 0x04C11DB700000000;
constexpr const uint64_t reference_crc = 6467333940108591104;
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source(source_size, 4);
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
// Filling source containers
std::iota(std::begin(source), std::end(source), 0);
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->op = qpl_op_crc64;
job->next_in_ptr =;
job->available_in = source_size;
job->crc64_poly = poly;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during CRC calculation.\n";
return 1;
const auto crc_value = job->crc64;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
if (crc_value != reference_crc) {
std::cout << "CRC value was calculated incorrectly.\n";
return 1;
std::cout << "CRC64 was performed successfully. Calculated CRC: " << crc_value << "\n";
return 0;
CRC64 with Device Selection From Another Socket Example#
* Copyright (C) 2024 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
#if defined(__linux__)
#include <fstream>
#include <sched.h>
#include <string>
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @note This example requires configuring accelerators to support multiple numa nodes,
* run `accel-config load-config -efc 2n1d1e1w-s.conf` to enable.
* Incorrect configuration will result in `503 QPL_STS_INIT_WORK_QUEUES_NOT_AVAILABLE` error.
* For multinode execution,
* use `numactl --cpunodebind 0-1 --membind 0-1 ./ll_cpp_crc64_numa_example hardware_path`
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
* If only 1 socket available, NUMA node selection will be set to automatic and uses same node.
* @brief This function finds the total sockets number on the system and the current socket ID
* results are returned using pointers.
* @param cpu_id - current CPU ID as int
* @param total_sockets - total sockets number as int pointer
* @param socket_id - current socket ID as int pointer
* @return error code as int
int get_socket_info(int cpu_id, int* total_sockets, int* socket_id) {
#if defined(__linux__)
std::ifstream cpu_info("/proc/cpuinfo");
std::string line;
*total_sockets = -1;
*socket_id = -1;
int is_current_processor = 0;
if (!cpu_info.is_open()) {
std::cout << "An error /proc/cpuinfo cannot be opened.\n";
return 1;
while (std::getline(cpu_info, line)) {
// For processor line, check if is current processor
if (line.find("processor") != std::string::npos) {
if (cpu_id == std::stoi(line.substr(line.find(":") + 1)))
is_current_processor = 1;
is_current_processor = 0;
// For physical id line
if (line.find("physical id") != std::string::npos) {
if ((*total_sockets < (std::stoi(line.substr(line.find(":") + 1)) + 1)))
*total_sockets = std::stoi(line.substr(line.find(":") + 1)) + 1;
if (is_current_processor) *socket_id = std::stoi(line.substr(line.find(":") + 1));
return 0;
std::cout << "Unsupported OS for qpl_path_hardware.\n\n";
return -1;
* @brief This function finds the total NUMA node number on the system and the current node ID
* results are returned using pointers.
* @param cpu_id - current CPU ID as int
* @param total_nodes - total NUMA nodes number as int pointer
* @param numa_id - current NUMA node ID as int pointer
* @return error code as int
int get_numa_info(int cpu_id, int* total_nodes, int* numa_id) {
#if defined(__linux__)
// Get number of available NUMA nodes
std::ifstream numa_nodes("/sys/devices/system/node/online");
*total_nodes = -1;
if (!numa_nodes.is_open()) {
std::cout << "An error /sys/devices/system/node/online cannot be opened.\n";
return 1;
std::string line;
std::getline(numa_nodes, line);
if (line.find("-") != std::string::npos) *total_nodes = std::stoi(line.substr(line.find("-") + 1)) + 1;
// Calculate current NUMA node
*numa_id = -1;
for (int i = 0; i < *total_nodes; ++i) {
std::ifstream numa_node("/sys/devices/system/node/node" + std::to_string(i) + "/cpulist");
if (!numa_node.is_open()) {
std::cout << "An error /sys/devices/system/node/node" + std::to_string(i) + "/cpulist cannot be opened.\n";
return 2;
std::getline(numa_node, line);
if (cpu_id <= std::stoi(line.substr(line.find("-") + 1))) {
*numa_id = i;
return 0;
std::cout << "Unsupported OS for qpl_path_hardware.\n\n";
return -1;
* @brief This function finds an alternative NUMA node that is different than the current NUMA node
* @note This function is optional and is not a core part of this example. Alternative method of obtaining
* a NUMA ID to assign Intel® Query Processing Library (Intel® QPL) task to use.
* @warning If only 1 socket available, NUMA node selection will be set to automatic and uses same node.
* @param execution_path - execution path as qpl_path_t
* @param inv_socket - pointer to store the ID of the different socket
* @param inv_numa_id - pointer to store the ID of the different NUMA node
* @return error code as int
int get_diff_socket_numa_node_id(qpl_path_t execution_path, int* inv_socket, int* inv_numa_id) {
#if defined(__linux__)
// Check execution path
if (execution_path == qpl_path_software) {
std::cout << "Software path detected, no accelerators available for NUMA assignment.\n";
return -1;
// Get currently used CPU
const int cpu_id = sched_getcpu();
// Get number of available sockets and current socket
int total_sockets = -1;
int socket_id = -1;
if (get_socket_info(cpu_id, &total_sockets, &socket_id)) { return -1; }
if (total_sockets < 2) {
std::cout << "Warning: Single socket architecture, running on same socket.\n";
return -1;
// Get number of available numa nodes and current node
int total_nodes = -1;
int numa_id = -1;
if (get_numa_info(cpu_id, &total_nodes, &numa_id)) { return -1; }
// Print stats
std::cout << "Total:\n";
std::cout << "\t"
<< "Socket(s):" << total_sockets << "\n";
std::cout << "\t"
<< "NUMA(s):" << total_nodes << "\n";
std::cout << "Current:\n";
std::cout << "\t"
<< "Core ID:" << cpu_id << "\n";
std::cout << "\t"
<< "Socket ID:" << socket_id << "\n";
std::cout << "\t"
<< "NUMA ID:" << numa_id << "\n";
// Calculate different NUMA node
const int numa_per_socket = total_nodes / total_sockets;
*inv_socket = !socket_id; // Get the ID for a different socket
*inv_numa_id = numa_per_socket * *inv_socket;
// Return success
return 0;
std::cout << "Unsupported OS for qpl_path_hardware.\n\n";
return -1;
constexpr const uint32_t source_size = 1000;
constexpr const uint64_t poly = 0x04C11DB700000000;
constexpr const uint64_t reference_crc = 6467333940108591104;
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source(source_size, 4);
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
// Filling source containers
std::iota(std::begin(source), std::end(source), 0);
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->op = qpl_op_crc64;
job->next_in_ptr =;
job->available_in = source_size;
job->crc64_poly = poly;
// Setting NUMA node for device selection
int inv_socket = -1;
int numa_node = -1;
get_diff_socket_numa_node_id(execution_path, &inv_socket, &numa_node);
std::cout << "Running on:\n";
std::cout << "\t"
<< "Socket ID:" << inv_socket << "\n";
std::cout << "\t"
<< "NUMA ID:" << numa_node << "\n\n";
std::cout << "This example would be run using accelerator devices from NUMA node " << numa_node << "\n\n";
job->numa_id = numa_node;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during CRC calculation.\n";
return 1;
const auto crc_value = job->crc64;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
if (crc_value != reference_crc) {
std::cout << "CRC value was calculated incorrectly.\n";
return 1;
std::cout << "CRC64 was performed successfully. Calculated CRC: " << crc_value << "\n";
return 0;
* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
constexpr const uint32_t source_size = 5;
constexpr const uint32_t mask_byte_length = 1;
constexpr const uint32_t input_vector_width = 8;
constexpr const uint32_t output_vector_width = 1;
constexpr const uint8_t mask = 0b10111001;
constexpr const uint32_t mask_size = 8;
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source = {1, 2, 3, 4, 5};
std::vector<uint8_t> destination(source_size * 4, 0);
std::vector<uint8_t> reference = {1, 0, 0, 2, 3, 4, 0, 5};
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->next_in_ptr =;
job->available_in = static_cast<uint32_t>(source.size());
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(destination.size());
job->op = qpl_op_expand;
job->src1_bit_width = input_vector_width;
job->src2_bit_width = output_vector_width;
job->available_src2 = mask_byte_length;
job->num_input_elements = mask_size;
job->out_bit_width = qpl_ow_8;
job->next_src2_ptr = const_cast<uint8_t*>(&mask);
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing expand.\n";
return 1;
const auto expand_size = job->total_out;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
for (size_t i = 0; i < expand_size; i++) {
if (destination[i] != reference[i]) {
std::cout << "Expand was done incorrectly.\n";
return 1;
std::cout << "Expand was performed successfully.\n";
return 0;
Expand with Force Array Output Modification#
* Copyright (C) 2024 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* This example demonstrates the usage of the `Force Array Output Modification` feature. The feature allows the user to
* force the output of filter operations to be an array of a size specified by the user. Without this feature, the output
* of filter operations where the output size is 1 bit will be returned as a bit vector. The feature is enabled by setting
* the `QPL_FLAG_FORCE_ARRAY_OUTPUT` flag in the job structure. The feature is supported only in `Hardware Path` on the
* Intel® In-Memory Analytics Accelerator (Intel® IAA) 2.0 devices.
* @warning The use of `Force Array Output Modification` requires the use of the `Output Bit Width Modification` feature.
* The `Output Bit Width Modification` feature allows the user to specify the bit width of the output of the filter operation.
* The feature is enabled by setting the `out_bit_width` field in the job structure.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
constexpr const uint32_t source_size = 5U;
constexpr const uint32_t mask_byte_length = 1U;
constexpr const uint32_t input_vector_width = 1U;
constexpr const uint32_t output_vector_width = 1U;
constexpr const uint8_t mask = 0b0000000'0U;
constexpr const uint32_t mask_size = 1U;
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Hardware Path
qpl_path_t execution_path = qpl_path_hardware;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source = {0b0000'0001U};
std::vector<uint8_t> destination = {0U};
std::vector<uint8_t> reference = {0U};
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0U;
// Check if on software path
if (execution_path == qpl_path_software) {
std::cout << "Force Array Output Modification is not supported on qpl_path_software.\n";
return 0;
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->next_in_ptr =;
job->available_in = static_cast<uint32_t>(source.size());
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(destination.size());
job->op = qpl_op_expand;
job->src1_bit_width = input_vector_width;
job->src2_bit_width = output_vector_width;
job->available_src2 = mask_byte_length;
job->num_input_elements = mask_size;
job->out_bit_width = qpl_ow_8;
job->next_src2_ptr = const_cast<uint8_t*>(&mask);
// Enable Force Array Output Modification
status = qpl_execute_job(job);
<< "Force Array Output Modification is not supported. This feature is only available on Intel® In-Memory Analytics Accelerator (Intel® IAA) 2.0 and Hardware Path.\n";
return 0;
} else if (status == QPL_STS_OUT_FORMAT_ERR) {
<< "Using Force Array Output Modification flag requires setting output bit width with `job->out_bit_width`\n";
return 1;
} else if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing expand.\n";
return 1;
const auto expand_size = job->total_out;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
if (expand_size != 1) {
std::cout << "Error occurred, expected expand size 1, but got " << expand_size << "\n";
return 1;
} else {
// Compare with reference
if (destination[0] == reference[0]) {
std::cout << "Expand with Force Array Output Modification was performed successfully.\n";
} else {
<< "Error occurred in Expand with Force Array Output Modification, expand result is not equal to reference."
<< "\n";
return 1;
return 0;
* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
constexpr const uint32_t source_size = 1000;
constexpr const uint32_t input_vector_width = 8;
constexpr const uint32_t lower_index = 80;
constexpr const uint32_t upper_index = 123;
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source(source_size, 0);
std::vector<uint8_t> destination(source_size, 4);
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
// Filling source container
std::iota(std::begin(source), std::end(source), 0);
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->next_in_ptr =;
job->available_in = source_size;
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(destination.size());
job->op = qpl_op_extract;
job->src1_bit_width = input_vector_width;
job->param_low = lower_index;
job->param_high = upper_index;
job->num_input_elements = source_size;
job->out_bit_width = qpl_ow_nom;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing extract.\n";
return 1;
const auto extract_size = job->total_out;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
for (size_t i = 0; i < extract_size; i++) {
if (destination[i] != source[i + lower_index]) {
std::cout << "Extract was done incorrectly.\n";
return 1;
std::cout << "Extract was performed successfully.\n";
return 0;
Scan for Elements Equal to Specific Value#
* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
constexpr const uint32_t source_size = 1000;
constexpr const uint32_t input_bit_width = 8;
constexpr const uint32_t output_vector_width = 32;
constexpr const uint32_t value_to_find = 48;
constexpr const uint32_t byte_bit_length = 8;
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source(source_size, 0);
std::vector<uint8_t> destination(source_size, 4);
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
const auto* indices = reinterpret_cast<const uint32_t*>(;
// Filling source containers
std::iota(std::begin(source), std::end(source), 0);
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->next_in_ptr =;
job->available_in = source_size;
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(destination.size());
job->op = qpl_op_scan_eq;
job->src1_bit_width = input_bit_width;
job->num_input_elements = source_size;
job->out_bit_width = qpl_ow_32;
job->param_low = value_to_find;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing scan.\n";
return 1;
const auto indices_size_in_bytes = job->total_out;
const auto indices_size_in_elements = indices_size_in_bytes * byte_bit_length / output_vector_width;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
for (uint32_t i = 0; i < indices_size_in_elements; i++) {
if (source[indices[i]] != value_to_find) {
std::cout << "Scan was done incorrectly.\n";
return 1;
std::cout << "Scan was performed successfully.\n";
return 0;
Scan for Elements in Specific Range#
* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
constexpr const uint32_t source_size = 1000;
constexpr const uint32_t input_vector_width = 8;
constexpr const uint32_t output_vector_width = 32;
constexpr const uint32_t lower_boundary = 48;
constexpr const uint32_t upper_boundary = 58;
constexpr const uint32_t byte_bit_length = 8;
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source(source_size, 0);
std::vector<uint8_t> destination(source_size * 4, 4);
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
const auto* indices = reinterpret_cast<const uint32_t*>(;
// Filling source containers
std::iota(std::begin(source), std::end(source), 0);
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing an operation
job->next_in_ptr =;
job->available_in = static_cast<uint32_t>(source.size());
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(destination.size());
job->op = qpl_op_scan_range;
job->src1_bit_width = input_vector_width;
job->num_input_elements = static_cast<uint32_t>(source.size());
job->out_bit_width = qpl_ow_32;
job->param_low = lower_boundary;
job->param_high = upper_boundary;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing scan range.\n";
return 1;
const auto indices_size_in_bytes = job->total_out;
const auto indices_size_in_elements = indices_size_in_bytes * byte_bit_length / output_vector_width;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
for (uint32_t i = 0; i < indices_size_in_elements; i++) {
const auto element = source[indices[i]];
if (element < lower_boundary || element > upper_boundary) {
std::cout << "Scan range was done incorrectly.\n";
return 1;
std::cout << "Scan range was performed successfully.\n";
return 0;
* Copyright (C) 2022 Intel Corporation
* SPDX-License-Identifier: MIT
#include <iostream>
#include <memory>
#include <numeric>
#include <vector>
#include "qpl/qpl.h"
#include "examples_utils.hpp" // for argument parsing function
* @brief This example requires a command line argument to set the execution path. Valid values are `software_path`
* and `hardware_path`.
* In QPL, @ref qpl_path_software (`Software Path`) means that computations will be done with CPU.
* Accelerator can be used instead of CPU. In this case, @ref qpl_path_hardware (`Hardware Path`) must be specified.
* If there is no difference where calculations should be done, @ref qpl_path_auto (`Auto Path`) can be used to allow
* the library to chose the path to execute. The Auto Path usage is not demonstrated by this example.
* @warning ---! Important !---
* `Hardware Path` doesn't support all features declared for `Software Path`
constexpr const uint32_t source_size = 1000;
constexpr const uint32_t boundary = 48;
constexpr const uint32_t scan_input_vector_width = 8;
constexpr const uint32_t select_output_vector_width = 1;
constexpr const uint32_t byte_bit_length = 8;
auto main(int argc, char** argv) -> int {
std::cout << "Intel(R) Query Processing Library version is " << qpl_get_library_version() << ".\n";
// Default to Software Path
qpl_path_t execution_path = qpl_path_software;
// Get path from input argument
const int parse_ret = parse_execution_path(argc, argv, &execution_path);
if (parse_ret != 0) { return 1; }
// Source and output containers
std::vector<uint8_t> source(source_size, 0);
std::vector<uint8_t> mask_after_scan(source_size / 8, 4);
std::vector<uint8_t> destination(source_size, 4);
std::unique_ptr<uint8_t[]> job_buffer;
uint32_t size = 0;
// Filling source containers
std::iota(std::begin(source), std::end(source), 0);
// Job initialization
qpl_status status = qpl_get_job_size(execution_path, &size);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job size getting.\n";
return 1;
job_buffer = std::make_unique<uint8_t[]>(size);
qpl_job* job = reinterpret_cast<qpl_job*>(job_buffer.get());
status = qpl_init_job(execution_path, job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job initializing.\n";
return 1;
// Performing a scan operation
job->next_in_ptr =;
job->available_in = source_size;
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(mask_after_scan.size());
job->op = qpl_op_scan_eq;
job->src1_bit_width = scan_input_vector_width;
job->num_input_elements = source_size;
job->out_bit_width = qpl_ow_nom;
job->param_low = boundary;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing scan.\n";
return 1;
const auto scan_byte_size = job->total_out;
const auto mask_length = scan_byte_size;
// Performing a select operation
job->next_in_ptr =;
job->available_in = source_size;
job->next_out_ptr =;
job->available_out = static_cast<uint32_t>(destination.size());
job->op = qpl_op_select;
job->src1_bit_width = scan_input_vector_width;
job->num_input_elements = source_size;
job->out_bit_width = qpl_ow_nom;
job->next_src2_ptr =;
job->available_src2 = mask_length;
job->src2_bit_width = select_output_vector_width;
status = qpl_execute_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during performing select.\n";
return 1;
const auto select_byte_size = job->total_out;
// Freeing resources
status = qpl_fini_job(job);
if (status != QPL_STS_OK) {
std::cout << "An error " << status << " acquired during job finalization.\n";
return 1;
// Compare with reference
for (uint32_t i = 0; i < select_byte_size; i++) {
if (destination[i] != boundary) {
std::cout << "Select was done incorrectly.\n";
return 1;
std::cout << "Select was performed successfully.\n";
return 0;