in src/operator/cudnn_convolution-inl.h [165:286]
virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
size_t expected = param_.no_bias == 0 ? 3 : 2;
DType *grad_ptr = NULL;
DType *wmat_ptr = NULL;
DType *gwmat_ptr = NULL;
DType *data_ptr = NULL;
DType *gdata_ptr = NULL;
CHECK_EQ(out_grad.size(), 1U);
CHECK(in_data.size() == expected && in_grad.size() == expected);
Stream<gpu> *s = ctx.get_stream<gpu>();
if (param_.kernel.ndim() == 2) {
Tensor<gpu, 4, DType> grad = out_grad[conv::kOut].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> wmat = in_data[conv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> gwmat = in_grad[conv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> data = in_data[conv::kData].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> gdata = in_grad[conv::kData].get<gpu, 4, DType>(s);
grad_ptr = grad.dptr_;
wmat_ptr = wmat.dptr_;
gwmat_ptr = gwmat.dptr_;
data_ptr = data.dptr_;
gdata_ptr = gdata.dptr_;
} else {
Tensor<gpu, 5, DType> grad = out_grad[conv::kOut].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> wmat = in_data[conv::kWeight].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> gwmat = in_grad[conv::kWeight].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> data = in_data[conv::kData].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> gdata = in_grad[conv::kData].get<gpu, 5, DType>(s);
grad_ptr = grad.dptr_;
wmat_ptr = wmat.dptr_;
gwmat_ptr = gwmat.dptr_;
data_ptr = data.dptr_;
gdata_ptr = gdata.dptr_;
}
Tensor<gpu, 1, DType> workspace =
ctx.requested[conv::kTempSpace].get_space_typed<gpu, 1, DType>(
mshadow::Shape1(backward_workspace_), s);
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType beta = 0.0f;
typename DataType<DType>::ScaleType beta_add = 1.0f;
if (!param_.no_bias && (req[conv::kBias] != kNullOp)) {
Tensor<gpu, 1, DType> gbias = in_grad[conv::kBias].get<gpu, 1, DType>(s);
CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
&alpha,
out_desc_,
grad_ptr + out_offset_ * g,
req[conv::kBias] == kAddTo ? &beta_add : &beta,
bias_desc_,
gbias.dptr_ + bias_offset_ * g));
}
if (req[conv::kWeight] != kNullOp) {
#if CUDNN_MAJOR <= 4
CUDNN_CALL(cudnnConvolutionBackwardFilter_v3(s->dnn_handle_,
&alpha,
in_desc_,
data_ptr + data_offset_ * g,
out_desc_,
grad_ptr + out_offset_ * g,
backward_conv_desc_,
back_algo_w_,
workspace.dptr_,
backward_workspace_byte_,
req[conv::kWeight] == kAddTo? &beta_add : &beta,
filter_desc_,
gwmat_ptr + weight_offset_ * g));
#elif CUDNN_MAJOR >= 5
CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_,
&alpha,
in_desc_,
data_ptr + data_offset_ * g,
out_desc_,
grad_ptr + out_offset_ * g,
backward_conv_desc_,
back_algo_w_,
workspace.dptr_,
backward_workspace_byte_,
req[conv::kWeight] == kAddTo? &beta_add : &beta,
filter_desc_,
gwmat_ptr + weight_offset_ * g));
#endif
}
if (req[conv::kData] != kNullOp) {
#if CUDNN_MAJOR <= 4
CUDNN_CALL(cudnnConvolutionBackwardData_v3(s->dnn_handle_,
&alpha,
filter_desc_,
wmat_ptr + weight_offset_ * g,
out_desc_,
grad_ptr + out_offset_ * g,
backward_conv_desc_,
back_algo_,
workspace.dptr_,
backward_workspace_byte_,
req[conv::kData] == kAddTo? &beta_add : &beta,
in_desc_,
gdata_ptr + data_offset_ * g));
#elif CUDNN_MAJOR >= 5
CUDNN_CALL(cudnnConvolutionBackwardData(s->dnn_handle_,
&alpha,
filter_desc_,
wmat_ptr + weight_offset_ * g,
out_desc_,
grad_ptr + out_offset_ * g,
backward_conv_desc_,
back_algo_,
workspace.dptr_,
backward_workspace_byte_,
req[conv::kData] == kAddTo? &beta_add : &beta,
in_desc_,
gdata_ptr + data_offset_ * g));
#endif
}
}
}