in src/model/operation/convolution.cc [96:225]
Tensor CpuConvForward(const Tensor &x, Tensor &W, Tensor &b,
const ConvHandle &ch) {
CHECK_EQ(x.device()->lang(), kCpp);
CHECK(x.shape(1) == ch.channels && x.shape(2) == ch.height &&
x.shape(3) == ch.width)
<< "input sample shape should not change";
CHECK(W.shape(0) == ch.num_filters && W.shape(1) == ch.channels &&
W.shape(2) == ch.kernel_h && W.shape(3) == ch.kernel_w)
<< "weights shape should not change";
#ifdef USE_DNNL
DataType dtype = x.data_type();
auto dev = x.device();
Shape shape{ch.batchsize, ch.num_filters, ch.conv_height, ch.conv_width};
Tensor output(shape, dev, dtype);
output.device()->Exec(
[output, x, &W, &b, &ch](Context *ctx) mutable {
using namespace dnnl;
using tag = memory::format_tag;
auto eng = ctx->dnnl_engine;
auto s = ctx->dnnl_stream;
auto dtype = dnnl::memory::data_type::f32;
// dnnl design pattern
// xxx_user_xxx_memory(and its format tag) is defined by user, which may
// need to be reordered
auto conv_user_src_memory = memory({{ch.x_dims}, dtype, tag::nchw}, eng,
x.block()->mutable_data());
auto conv_user_weights_memory = memory({{ch.w_dims}, dtype, tag::goihw},
eng, W.block()->mutable_data());
auto conv_user_bias_memory = memory({{ch.b_dims}, dtype, tag::x}, eng,
b.block()->mutable_data());
// xxx_xxx_memory_md is created for creating conv_desc, and format tag
// is defined as any
auto conv_src_md = memory::desc({ch.x_dims}, dtype, tag::any);
auto conv_bias_md = memory::desc({ch.b_dims}, dtype, tag::any);
auto conv_weights_md = memory::desc({ch.w_dims}, dtype, tag::any);
auto conv_dst_md = memory::desc({ch.o_dims}, dtype,
tag::nchw); // could not set to any
auto conv_desc = convolution_forward::desc(
prop_kind::forward, algorithm::convolution_direct, conv_src_md,
conv_weights_md, conv_bias_md, conv_dst_md, ch.s_dims, ch.p_dims,
ch.p_dims);
auto conv_pd = convolution_forward::primitive_desc(conv_desc, eng);
// auto conv_pd = *ch.conv_pd; // 1ms to 70 ms slower
// memory placeholder for reorder
auto conv_src_memory = conv_user_src_memory;
auto conv_weights_memory = conv_user_weights_memory;
// output memory
auto conv_dst_memory =
memory(conv_pd.dst_desc(), eng, output.block()->mutable_data());
// Tensor for reorder - tesing performance shows no significant improve
Tensor x_reo;
x_reo.ResetLike(x);
Tensor W_reo;
W_reo.ResetLike(W);
if (conv_pd.src_desc() != conv_user_src_memory.get_desc()) {
conv_src_memory =
memory(conv_pd.src_desc(), eng, x_reo.block()->mutable_data());
reorder(conv_user_src_memory, conv_src_memory)
.execute(s, {{DNNL_ARG_FROM, conv_user_src_memory},
{DNNL_ARG_TO, conv_src_memory}});
}
if (conv_pd.weights_desc() != conv_user_weights_memory.get_desc()) {
conv_weights_memory = memory(conv_pd.weights_desc(), eng,
W_reo.block()->mutable_data());
reorder(conv_user_weights_memory, conv_weights_memory)
.execute(s, {{DNNL_ARG_FROM, conv_user_weights_memory},
{DNNL_ARG_TO, conv_weights_memory}});
}
// execuete forward
convolution_forward(conv_pd).execute(
s, {{DNNL_ARG_SRC, conv_src_memory},
{DNNL_ARG_WEIGHTS, conv_weights_memory},
{DNNL_ARG_BIAS, conv_user_bias_memory},
{DNNL_ARG_DST, conv_dst_memory}});
// synchronize stream
s.wait();
},
{x.block(), W.block(), b.block()}, {output.block()}, "CpuConvForward");
return output;
#else // cpp naive, error due to Im2col importing
/*
Shape w_shape = W.shape();
Shape b_shape;
if (ch.bias_term) b_shape = b.shape();
W.Reshape(Shape{ch.num_filters, ch.col_height});
if (ch.bias_term) b.Reshape(Shape{ch.num_filters});
DataType dtype = x.data_type();
auto dev = x.device();
Shape shape{ch.batchsize, ch.num_filters, ch.conv_height, ch.conv_width};
Tensor output(shape, dev, dtype);
Tensor col_data(Shape{ch.col_height, ch.col_width}); // broadcasted image
float *data_col = new float[ch.col_height * ch.col_width];
auto in_data = x.data<float>();
for (size_t num = 0; num < ch.batchsize; num++) {
Im2col(in_data + num * ch.imagesize, ch.channels, ch.height, ch.width,
ch.kernel_h, ch.kernel_w, ch.pad_h, ch.pad_w, ch.stride_h,
ch.stride_w, data_col);
col_data.CopyDataFromHostPtr(data_col, ch.col_height * ch.col_width);
Tensor each = Mult(W, col_data);
if (ch.bias_term) {
AddColumn(b, &each);
}
CopyDataToFrom(&output, each, each.Size(), num * each.Size());
};
W.Reshape(w_shape);
if (ch.bias_term) b.Reshape(b_shape);
return output;
*/
#endif // USE_DNNL
}