clDNN
Introduction to custom primitives.

Introduction

In this chapter we will create a custom primitive implemented with an OpenCL kernel and
execute it in a network.

/*
// Copyright (c) 2017 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
#include <../api/CPP/cldnn_defs.h>
#include <../api/CPP/engine.hpp>
#include <../api/CPP/input_layout.hpp>
#include <../api/CPP/memory.hpp>
#include <../api/CPP/data.hpp>
#include <../api/CPP/topology.hpp>
#include <../api/CPP/network.hpp>
#include <../api/CPP/custom_gpu_primitive.hpp>
#include <iostream>
#include <iomanip>
#include <chrono>
#include "helper_functions.h"
using namespace cldnn;
void chapter_7(engine& my_engine)
{
std::cout << std::endl << "-- Chapter 7 --" << std::endl;
// We are going to implement a custom primitive that will execute a simple
// addition kernel.
// The Kernel will be implemented in OpenCL and will simply add 2 input buffers
// Define a memory layout we'll use for input/output
layout my_layout({ data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
// Create input memory primitives
memory input_prim1 = memory::allocate(my_engine, my_layout);
memory input_prim2 = memory::allocate(my_engine, my_layout);
set_values(input_prim1, get_simple_data<float>(input_prim1));
set_values(input_prim2, get_simple_data<float>(input_prim2));
// OpenCL kernel for the custom primitive
std::string custom_primitive_kernel_code =
R"__krnl(
__kernel void add_kernel(const __global float* input0, const __global float* input1, __global float* output)
{
const unsigned idx = get_global_id(0);
output[idx] = input0[idx] + input1[idx];
}
)__krnl";
// The name of the OpenCL Entry point function
std::string entry_point = "add_kernel";
// Parameter binding for the custom primitive
std::vector<cldnn_arg> parameters = { { arg_input, 0 }, { arg_input, 1 }, { arg_output, 0 } };
// Output layout for the custom primitive
layout output_layout = my_layout;
// Compiler options to be handed to the OpenCL runtime
std::string compilation_options = "-cl-mad-enable";
// Now we can create a topology holding the inputs and add the custom primitive to it
topology my_topology;
my_topology.add(input_layout("input1", input_prim1.get_layout()));
my_topology.add(input_layout("input2", input_prim2.get_layout()));
"my_custom_primitive",
{ "input1", "input2" },
{ custom_primitive_kernel_code },
entry_point,
parameters,
compilation_options,
output_layout));
// We can now build the network
network my_network(my_engine, my_topology);
// Set the inputs
my_network.set_input_data("input1", input_prim1);
my_network.set_input_data("input2", input_prim2);
// And Execute
auto outputs = my_network.execute();
// Finally, we print out the input and output data
std::cout << "input1:" << std::endl;
for (const auto value : input_prim1.pointer<float>())
{
std::cout << std::setw(3) << value << ", ";
}
std::cout << std::endl;
std::cout << "input2:" << std::endl;
for (const auto value : input_prim2.pointer<float>())
{
std::cout << std::setw(3) << value << ", ";
}
std::cout << std::endl;
std::cout << "output:" << std::endl;
for (const auto value : outputs.at("my_custom_primitive").get_memory().pointer<float>())
{
std::cout << std::setw(3) << value << ", ";
}
std::cout << std::endl;
}