clDNN/chapter__7_8cpp_source.html

 /*
 // Copyright (c) 2017 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 */

 #include <../api/CPP/cldnn_defs.h>
 #include <../api/CPP/engine.hpp>
 #include <../api/CPP/input_layout.hpp>
 #include <../api/CPP/memory.hpp>
 #include <../api/CPP/data.hpp>
 #include <../api/CPP/topology.hpp>
 #include <../api/CPP/network.hpp>
 #include <../api/CPP/custom_gpu_primitive.hpp>
 #include <iostream>
 #include <iomanip>
 #include <chrono>

 #include "helper_functions.h"

 using namespace cldnn;


 void chapter_7(engine& my_engine)
 {
     std::cout << std::endl << "-- Chapter 7 --" << std::endl;

     // We are going to implement a custom primitive that will execute a simple
     // addition kernel.
     // The Kernel will be implemented in OpenCL and will simply add 2 input buffers

     // Define a memory layout we'll use for input/output
     layout my_layout({ data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });

     // Create input memory primitives
     memory input_prim1 = memory::allocate(my_engine, my_layout);
     memory input_prim2 = memory::allocate(my_engine, my_layout);

     set_values(input_prim1, get_simple_data<float>(input_prim1));
     set_values(input_prim2, get_simple_data<float>(input_prim2));

     // OpenCL kernel for the custom primitive
     std::string custom_primitive_kernel_code =
         R"__krnl(
             __kernel void add_kernel(const __global float* input0, const __global float* input1, __global float* output)
             {
                 const unsigned idx = get_global_id(0);
                 output[idx] = input0[idx] + input1[idx];
             }
         )__krnl";

     // The name of the OpenCL Entry point function
     std::string entry_point = "add_kernel";

     // Parameter binding for the custom primitive
     std::vector<cldnn_arg> parameters = { { arg_input, 0 }, { arg_input, 1 }, { arg_output, 0 } };

     // Output layout for the custom primitive
     layout output_layout = my_layout;

     // Compiler options to be handed to the OpenCL runtime
     std::string compilation_options = "-cl-mad-enable";

     // Now we can create a topology holding the inputs and add the custom primitive to it
     topology my_topology;
     my_topology.add(input_layout("input1", input_prim1.get_layout()));
     my_topology.add(input_layout("input2", input_prim2.get_layout()));
     my_topology.add(custom_gpu_primitive(
         "my_custom_primitive",
         { "input1", "input2" },
         { custom_primitive_kernel_code },
         entry_point,
         parameters,
         compilation_options,
         output_layout));

     // We can now build the network
     network my_network(my_engine, my_topology);

     // Set the inputs
     my_network.set_input_data("input1", input_prim1);
     my_network.set_input_data("input2", input_prim2);

     // And Execute
     auto outputs = my_network.execute();

     // Finally, we print out the input and output data
     std::cout << "input1:" << std::endl;
     for (const auto value : input_prim1.pointer<float>())
     {
         std::cout << std::setw(3) << value << ", ";
     }
     std::cout << std::endl;

     std::cout << "input2:" << std::endl;
     for (const auto value : input_prim2.pointer<float>())
     {
         std::cout << std::setw(3) << value << ", ";
     }
     std::cout << std::endl;

     std::cout << "output:" << std::endl;
     for (const auto value : outputs.at("my_custom_primitive").get_memory().pointer<float>())
     {
         std::cout << std::setw(3) << value << ", ";
     }
     std::cout << std::endl;
 }
cldnn::build_option_type::outputs
User selected list of program outputs.

cldnn::input_layout
Provides input layout for a data to be passed later to network.
Definition: input_layout.hpp:39

cldnn::topology
Network topology to be defined by user.
Definition: topology.hpp:33

cldnn
Definition: activation.hpp:22

cldnn::topology::add
void add(PType const &desc)
Adds a primitive to topology.
Definition: topology.hpp:75

cldnn::memory::allocate
static memory allocate(const engine &engine, const layout &layout)
Allocate memory on engine using specified layout.
Definition: memory.hpp:50

cldnn::format::bfyx
the most common format for activations in clDNN.
Definition: tensor.hpp:81

cldnn::custom_gpu_primitive
This primitive executes a custom kernel provided by the application.
Definition: custom_gpu_primitive.hpp:35

cldnn::network
Executable network allocated from program.
Definition: network.hpp:59

cldnn::memory::get_layout
const layout & get_layout() const
Associated layout.
Definition: memory.hpp:114

cldnn::engine
Represents clDNN engine object.
Definition: engine.hpp:110

cldnn::memory
Represents buffer with particular layout.
Definition: memory.hpp:42

cldnn::layout
Describes memory layout.
Definition: layout.hpp:223