oneAPI Deep Neural Network Library (oneDNN)  1.95.0
Performance library for Deep Learning
cross_engine_reorder.cpp

Annotated version: Reorder between CPU and GPU engines

/*******************************************************************************
* Copyright 2019-2020 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <iostream>
#include <stdexcept>
#include <vector>
// [Prologue]
#include "dnnl.hpp"
#include "example_utils.hpp"
#include "example_utils.hpp"
using namespace dnnl;
using namespace std;
// [Prologue]
void fill(memory &mem, const memory::dims &adims) {
std::vector<float> array(product(adims));
for (size_t e = 0; e < array.size(); ++e) {
array[e] = e % 7 ? 1.0f : -1.0f;
}
write_to_dnnl_memory(array.data(), mem);
}
int find_negative(memory &mem, const memory::dims &adims) {
int negs = 0;
std::vector<float> array(product(adims));
read_from_dnnl_memory(array.data(), mem);
for (size_t e = 0; e < (size_t)product(adims); ++e)
negs += array[e] < 0.0f;
return negs;
}
void cross_engine_reorder_tutorial() {
// [Initialize engine]
auto cpu_engine = engine(validate_engine_kind(engine::kind::cpu), 0);
auto gpu_engine = engine(validate_engine_kind(engine::kind::gpu), 0);
// [Initialize engine]
// [Initialize stream]
auto stream_gpu = stream(gpu_engine, stream::flags::in_order);
// [Initialize stream]
// [reorder cpu2gpu]
const auto tz = memory::dims {2, 16, 1, 1};
auto m_cpu
cpu_engine);
auto m_gpu
gpu_engine);
fill(m_cpu, tz);
auto r1 = reorder(m_cpu, m_gpu);
// [reorder cpu2gpu]
// [Create a ReLU primitive]
// ReLU op descriptor (uses a GPU memory as source memory.
// no engine- or implementation-specific information)
// ReLU primitive descriptor, which corresponds to a particular
// implementation in the library. Specify engine type for the ReLU
// primitive. Use a GPU engine here.
auto relu_pd = eltwise_forward::primitive_desc(relu_d, gpu_engine);
// ReLU primitive
auto relu = eltwise_forward(relu_pd);
// [Create a ReLU primitive]
// [reorder gpu2cpu]
auto r2 = reorder(m_gpu, m_cpu);
// [reorder gpu2cpu]
// [Execute primitives]
// wrap source data from CPU to GPU
r1.execute(stream_gpu, m_cpu, m_gpu);
// Execute ReLU on a GPU stream
relu.execute(stream_gpu, {{DNNL_ARG_SRC, m_gpu}, {DNNL_ARG_DST, m_gpu}});
// Get result data from GPU to CPU
r2.execute(stream_gpu, m_gpu, m_cpu);
stream_gpu.wait();
// [Execute primitives]
// [Check the results]
if (find_negative(m_cpu, tz) != 0)
throw std::logic_error(
"Unexpected output, find a negative value after the ReLU "
"execution.");
// [Check the results]
}
int main(int argc, char **argv) {
return handle_example_errors({engine::kind::cpu, engine::kind::gpu},
cross_engine_reorder_tutorial);
}
dnnl::reorder
Reorder primitive.
Definition: dnnl.hpp:3325
dnnl::stream
An execution stream.
Definition: dnnl.hpp:1086
dnnl::engine
An execution engine.
Definition: dnnl.hpp:865
dnnl::eltwise_forward
Elementwise unary operation forward propagation primitive.
Definition: dnnl.hpp:5649
dnnl::memory::data_type::f32
@ f32
32-bit/single-precision floating point.
dnnl::eltwise_forward::desc
Descriptor for an elementwise forward propagation primitive.
Definition: dnnl.hpp:5651
DNNL_ARG_DST
#define DNNL_ARG_DST
A special mnemonic for destination argument for primitives that have a single destination.
Definition: dnnl_types.h:1897
dnnl::eltwise_forward::primitive_desc
Primitive descriptor for an elementwise forward propagation primitive.
Definition: dnnl.hpp:5679
dnnl.hpp
dnnl::memory::get_desc
desc get_desc() const
Returns the associated memory descriptor.
Definition: dnnl.hpp:2139
DNNL_ARG_SRC
#define DNNL_ARG_SRC
A special mnemonic for source argument for primitives that have a single source.
Definition: dnnl_types.h:1873
dnnl::engine::kind::gpu
@ gpu
GPU engine.
dnnl::prop_kind::forward
@ forward
Forward data propagation, alias for dnnl::prop_kind::forward_training.
dnnl::algorithm::eltwise_relu
@ eltwise_relu
Elementwise: rectified linear unit (ReLU)
dnnl::memory
Memory object.
Definition: dnnl.hpp:1241
dnnl::memory::dims
std::vector< dim > dims
Vector of dimensions.
Definition: dnnl.hpp:1246
dnnl
oneDNN namespace
Definition: dnnl.hpp:86
dnnl::memory::format_tag::nchw
@ nchw
4D CNN activations tensor; an alias for dnnl::memory::format_tag::abcd
dnnl::stream::flags::in_order
@ in_order
In-order execution.
dnnl::engine::kind::cpu
@ cpu
CPU engine.