clDNN
chapter_8.cpp
1 /*
2 // Copyright (c) 2017 Intel Corporation
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 */
16 
17 #include <../api/CPP/cldnn_defs.h>
18 #include <../api/CPP/engine.hpp>
19 #include <../api/CPP/input_layout.hpp>
20 #include <../api/CPP/memory.hpp>
21 #include <../api/CPP/data.hpp>
22 #include <../api/CPP/topology.hpp>
23 #include <../api/CPP/network.hpp>
24 #include <../api/CPP/activation.hpp>
25 #include <../api/CPP/crop.hpp>
26 #include <../api/CPP/upsampling.hpp>
27 #include <iostream>
28 #include <chrono>
29 
30 #include "helper_functions.h"
31 
51 using namespace cldnn;
52 
53 //Helper function for printing primitive ids and profiling info
54 void print_info(std::map<primitive_id, primitive_id>& all_primitives, std::map<primitive_id, event>& executed_primitives)
55 {
56  std::cout << std::endl << "Org_primitive_id, Primitive_id_after_optimization" << std::endl;
57  for (auto& p : all_primitives)
58  {
59  std::cout << p.first << ", " << p.second << std::endl;
60  }
61 
62  // Now, we want to check what is the time of execution of each primitive:
63  std::vector<cldnn::instrumentation::profiling_info> profiling_table;
64  for (auto& p : executed_primitives)
65  {
66  profiling_table.push_back({ p.first, p.second.get_profiling_info() });
67  }
68 
69  // We have table of profiling metrics.
70  for (auto& p : profiling_table)
71  {
72  std::cout << p.name << ":" << std::endl;
73  for (auto& q : p.intervals)
74  {
75  std::cout << "\t" << q.name << ": " << std::chrono::duration_cast<std::chrono::duration<double, std::chrono::milliseconds::period>>(q.value->value()).count()
76  << " milliseconds" << std::endl;
77  }
78  }
79 }
80 
81 void chapter_8(engine& engine)
82 {
83  std::cout << std::endl << "-- Chapter 8 --" << std::endl;
84 
85  // We are going to implement a network with activation and two crops that will be optimized on graph level:
86  // _ CROP_1(1x3x2x2,offset(0x0x0x0)) --> RELU
87  // |
88  // INPUT(1x4x1x1)--UPSAMPLING(1x4x2x2)----RELU
89  // |_
90  // CROP_2(1x1x2x2,offset(0x3x0x0)) --> RELU
91  //
92 
93  // Create input memory for convolution layer
94  memory input_prim = memory::allocate(engine, { data_types::f32, format::bfyx,{ tensor(spatial(1, 1), feature(4), batch(1)) } });
95 
96  set_values(input_prim, get_simple_data<float>(input_prim));
97 
98  // Create a topology and add primitives
100  topology.add(input_layout("input", input_prim.get_layout()));
101  topology.add(upsampling("upsampling", "input", 2, 4, upsampling_sample_type::nearest));
102  topology.add(activation("relu", "upsampling", activation_relu));
103  topology.add(crop("crop1", "relu", tensor(batch(1), spatial(2, 2), feature(3)), { tensor(feature(0), spatial(0,0),batch(0)) }));
104  topology.add(crop("crop2", "relu", tensor(batch(1), spatial(2, 2), feature(1)), { tensor(feature(3), spatial(0,0),batch(0)) }));
105  topology.add(activation("relu1", "crop1", activation_relu));
106  topology.add(activation("relu2", "crop2", activation_relu));
107 
108  // Build network without optimize data build option
109  network network_1(engine, topology);
110 
111  // Set input.
112  network_1.set_input_data("input", input_prim);
113  // Ready to go.
114  auto outputs_1 = network_1.execute();
115 
116  // Get primitives that were executed and their events needed for profiling
117  // Please note that since optimize data is not set, then all primitives from created topology
118  // that are not constant (such as data primitives) will be on this list
119  auto executed_primitives_1 = network_1.get_executed_primitives();
120 
121  // Get all primitives names that are part of built network with their orginal names that were provided by user
122  // Please note that since optimize data is not set, this list will match topology.get_primitives()
123  // and all primitives names will be not changed, or optimized
124  auto all_primitives_1 = network_1.get_all_primitives();
125 
126  //Print list of primitives with orginal ids, and profiling info
127  std::cout << std::endl << "Primitives list and profiling info for network without optimize data build option." << std::endl;
128  print_info(all_primitives_1, executed_primitives_1);
129 
130  // Now lets build and execute the same network but with optimize data build option
131  build_options build_opt;
132  build_opt.set_option(build_option::optimize_data(true));
133  network network_2(engine, topology, build_opt);
134  network_2.set_input_data("input", input_prim);
135  auto outputs = network_2.execute();
136 
137  // Get primitives that were executed and their events needed for profiling
138  // Please note that first relu and two crop primtives are not on the list, since they were optimized during graph optimization.
139  // The list takes into account only primitives that were really executed.
140  auto executed_primitives_2 = network_2.get_executed_primitives();
141 
142  // Get all primitives names that are part of built network with their orginal names that were provided by user
143  // Please note that since optimize data is set, this list may no longer match topology.get_primitives(), and that is the case here.
144  // Some of the primitives that were fused are removed from the list like the first activation primitive.
145  // The primitives that were optimized (will not be executed) are now marked as "_optimized_" - please see crop primitives.
146  // There can be also cases when primitive name will no longer match the primitive provided by the user, this will happen only
147  // when primitive is set as output.
148  auto all_primitives_2 = network_2.get_all_primitives();
149 
150  //Print list of primitives with orginal ids, and profiling info
151  //Expected output from all_primitives_2 in this case will be:
152  //Org_primitive_id, Primitive_id_after_optimization
153  // crop1, _optimized_
154  // crop2, _optimized_
155  // input, input
156  // relu1, relu1
157  // relu2, relu2
158  // upsampling, upsampling
159  //
160  //As mentioned before, "relu" is not on the list as upsampling will perform built-in relu. Crop primitives are marked as _optimized_.
161  //Profiling data from executed_primitives_2 should contain 4 primitives - input, relu1, relu2 and upsampling
162  std::cout << std::endl << "Primitives list and profiling info for network with optimize data build option." << std::endl;
163  print_info(all_primitives_2, executed_primitives_2);
164 
165 }
Represents program build options list.
Definition: program.hpp:399
Activation using rectified linear unit or parameterized rectified linear unit.
Definition: activation.hpp:39
N-dimensional vector. Mostly used to represent memory size.
Definition: tensor.hpp:256
void set_option(std::shared_ptr< const build_option > opt)
Adds or replace option to the options list.
Definition: program.hpp:403
Performs crop operation on input.
Definition: crop.hpp:47
upsampling nearest neighbor.
User selected list of program outputs.
Provides input layout for a data to be passed later to network.
Network topology to be defined by user.
Definition: topology.hpp:33
void add(PType const &desc)
Adds a primitive to topology.
Definition: topology.hpp:75
static memory allocate(const engine &engine, const layout &layout)
Allocate memory on engine using specified layout.
Definition: memory.hpp:50
the most common format for activations in clDNN.
Definition: tensor.hpp:81
Performs nearest neighbor/bilinear upsampling Also supports built-in Relu activation available by set...
Definition: upsampling.hpp:42
Executable network allocated from program.
Definition: network.hpp:59
const layout & get_layout() const
Associated layout.
Definition: memory.hpp:114
Represents clDNN engine object.
Definition: engine.hpp:110
Represents buffer with particular layout.
Definition: memory.hpp:42
static std::shared_ptr< const build_option > optimize_data(bool enable=false)
Enable implicit reordering for user inputs (default: false).