In this chapter we will create a custom primitive implemented with an OpenCL kernel and
execute it in a network.
#include <../api/CPP/cldnn_defs.h>
#include <../api/CPP/engine.hpp>
#include <../api/CPP/input_layout.hpp>
#include <../api/CPP/memory.hpp>
#include <../api/CPP/data.hpp>
#include <../api/CPP/topology.hpp>
#include <../api/CPP/network.hpp>
#include <../api/CPP/custom_gpu_primitive.hpp>
#include <iostream>
#include <iomanip>
#include <chrono>
#include "helper_functions.h"
void chapter_7(
engine& my_engine)
{
std::cout << std::endl << "-- Chapter 7 --" << std::endl;
set_values(input_prim1, get_simple_data<float>(input_prim1));
set_values(input_prim2, get_simple_data<float>(input_prim2));
std::string custom_primitive_kernel_code =
R"__krnl(
__kernel void add_kernel(const __global float* input0, const __global float* input1, __global float* output)
{
const unsigned idx = get_global_id(0);
output[idx] = input0[idx] + input1[idx];
}
)__krnl";
std::string entry_point = "add_kernel";
std::vector<cldnn_arg> parameters = { { arg_input, 0 }, { arg_input, 1 }, { arg_output, 0 } };
layout output_layout = my_layout;
std::string compilation_options = "-cl-mad-enable";
"my_custom_primitive",
{ "input1", "input2" },
{ custom_primitive_kernel_code },
entry_point,
parameters,
compilation_options,
output_layout));
network my_network(my_engine, my_topology);
my_network.set_input_data("input1", input_prim1);
my_network.set_input_data("input2", input_prim2);
auto outputs = my_network.execute();
std::cout << "input1:" << std::endl;
for (const auto value : input_prim1.pointer<float>())
{
std::cout << std::setw(3) << value << ", ";
}
std::cout << std::endl;
std::cout << "input2:" << std::endl;
for (const auto value : input_prim2.pointer<float>())
{
std::cout << std::setw(3) << value << ", ";
}
std::cout << std::endl;
std::cout << "output:" << std::endl;
for (
const auto value :
outputs.at(
"my_custom_primitive").get_memory().pointer<
float>())
{
std::cout << std::setw(3) << value << ", ";
}
std::cout << std::endl;
}