clDNN
chapter_7.cpp
1 /*
2 // Copyright (c) 2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16 
17 #include <../api/CPP/cldnn_defs.h>
18 #include <../api/CPP/engine.hpp>
19 #include <../api/CPP/input_layout.hpp>
20 #include <../api/CPP/memory.hpp>
21 #include <../api/CPP/data.hpp>
22 #include <../api/CPP/topology.hpp>
23 #include <../api/CPP/network.hpp>
24 #include <../api/CPP/custom_gpu_primitive.hpp>
25 #include <iostream>
26 #include <iomanip>
27 #include <chrono>
28 
29 #include "helper_functions.h"
30 
40 using namespace cldnn;
41 
42 
43 void chapter_7(engine& my_engine)
44 {
45  std::cout << std::endl << "-- Chapter 7 --" << std::endl;
46 
47  // We are going to implement a custom primitive that will execute a simple
48  // addition kernel.
49  // The Kernel will be implemented in OpenCL and will simply add 2 input buffers
50 
51  // Define a memory layout we'll use for input/output
52  layout my_layout({ data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
53 
54  // Create input memory primitives
55  memory input_prim1 = memory::allocate(my_engine, my_layout);
56  memory input_prim2 = memory::allocate(my_engine, my_layout);
57 
58  set_values(input_prim1, get_simple_data<float>(input_prim1));
59  set_values(input_prim2, get_simple_data<float>(input_prim2));
60 
61  // OpenCL kernel for the custom primitive
62  std::string custom_primitive_kernel_code =
63  R"__krnl(
64  __kernel void add_kernel(const __global float* input0, const __global float* input1, __global float* output)
65  {
66  const unsigned idx = get_global_id(0);
67  output[idx] = input0[idx] + input1[idx];
68  }
69  )__krnl";
70 
71  // The name of the OpenCL Entry point function
72  std::string entry_point = "add_kernel";
73 
74  // Parameter binding for the custom primitive
75  std::vector<cldnn_arg> parameters = { { arg_input, 0 }, { arg_input, 1 }, { arg_output, 0 } };
76 
77  // Output layout for the custom primitive
78  layout output_layout = my_layout;
79 
80  // Compiler options to be handed to the OpenCL runtime
81  std::string compilation_options = "-cl-mad-enable";
82 
83  // Now we can create a topology holding the inputs and add the custom primitive to it
84  topology my_topology;
85  my_topology.add(input_layout("input1", input_prim1.get_layout()));
86  my_topology.add(input_layout("input2", input_prim2.get_layout()));
87  my_topology.add(custom_gpu_primitive(
88  "my_custom_primitive",
89  { "input1", "input2" },
90  { custom_primitive_kernel_code },
91  entry_point,
92  parameters,
93  compilation_options,
94  output_layout));
95 
96  // We can now build the network
97  network my_network(my_engine, my_topology);
98 
99  // Set the inputs
100  my_network.set_input_data("input1", input_prim1);
101  my_network.set_input_data("input2", input_prim2);
102 
103  // And Execute
104  auto outputs = my_network.execute();
105 
106  // Finally, we print out the input and output data
107  std::cout << "input1:" << std::endl;
108  for (const auto value : input_prim1.pointer<float>())
109  {
110  std::cout << std::setw(3) << value << ", ";
111  }
112  std::cout << std::endl;
113 
114  std::cout << "input2:" << std::endl;
115  for (const auto value : input_prim2.pointer<float>())
116  {
117  std::cout << std::setw(3) << value << ", ";
118  }
119  std::cout << std::endl;
120 
121  std::cout << "output:" << std::endl;
122  for (const auto value : outputs.at("my_custom_primitive").get_memory().pointer<float>())
123  {
124  std::cout << std::setw(3) << value << ", ";
125  }
126  std::cout << std::endl;
127 }
User selected list of program outputs.
Provides input layout for a data to be passed later to network.
Network topology to be defined by user.
Definition: topology.hpp:33
void add(PType const &desc)
Adds a primitive to topology.
Definition: topology.hpp:75
static memory allocate(const engine &engine, const layout &layout)
Allocate memory on engine using specified layout.
Definition: memory.hpp:50
the most common format for activations in clDNN.
Definition: tensor.hpp:81
This primitive executes a custom kernel provided by the application.
Executable network allocated from program.
Definition: network.hpp:59
const layout & get_layout() const
Associated layout.
Definition: memory.hpp:114
Represents clDNN engine object.
Definition: engine.hpp:110
Represents buffer with particular layout.
Definition: memory.hpp:42
Describes memory layout.
Definition: layout.hpp:223