in src/operator/cudnn_deconvolution-inl.h [179:297]
virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow::expr;
size_t expected = param_.no_bias == 0 ? 3 : 2;
DType *grad_ptr = NULL;
DType *wmat_ptr = NULL;
DType *gwmat_ptr = NULL;
DType *data_ptr = NULL;
DType *gdata_ptr = NULL;
CHECK_EQ(out_grad.size(), 1U);
CHECK(in_data.size() == expected && in_grad.size() == expected);
Stream<gpu> *s = ctx.get_stream<gpu>();
if (param_.kernel.ndim() == 2) {
Tensor<gpu, 4, DType> grad = out_grad[deconv::kOut].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> wmat = in_data[deconv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> gwmat = in_grad[deconv::kWeight].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> data = in_data[deconv::kData].get<gpu, 4, DType>(s);
Tensor<gpu, 4, DType> gdata = in_grad[deconv::kData].get<gpu, 4, DType>(s);
grad_ptr = grad.dptr_;
wmat_ptr = wmat.dptr_;
gwmat_ptr = gwmat.dptr_;
data_ptr = data.dptr_;
gdata_ptr = gdata.dptr_;
} else {
Tensor<gpu, 5, DType> grad = out_grad[deconv::kOut].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> wmat = in_data[deconv::kWeight].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> gwmat = in_grad[deconv::kWeight].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> data = in_data[deconv::kData].get<gpu, 5, DType>(s);
Tensor<gpu, 5, DType> gdata = in_grad[deconv::kData].get<gpu, 5, DType>(s);
grad_ptr = grad.dptr_;
wmat_ptr = wmat.dptr_;
gwmat_ptr = gwmat.dptr_;
data_ptr = data.dptr_;
gdata_ptr = gdata.dptr_;
}
CHECK_NE(req[deconv::kWeight], kWriteInplace);
if (!param_.no_bias) {
CHECK_NE(req[deconv::kBias], kWriteInplace);
}
CHECK_NE(req[deconv::kData], kWriteInplace);
Tensor<gpu, 1, DType> workspace =
ctx.requested[deconv::kTempSpace].get_space_typed<gpu, 1, DType>(
mshadow::Shape1(backward_workspace_), s);
for (uint32_t g = 0; g < param_.num_group; ++g) {
typename DataType<DType>::ScaleType alpha = 1.0f;
typename DataType<DType>::ScaleType bias_beta = 0.0f;
if (!param_.no_bias && req[deconv::kBias] == kAddTo) {
bias_beta = 1.0f;
}
typename DataType<DType>::ScaleType data_beta =
req[deconv::kData] == kAddTo ? 1.0f : 0.0f;
typename DataType<DType>::ScaleType weight_beta =
req[deconv::kWeight] == kAddTo ? 1.0f : 0.0f;
if (!param_.no_bias && (req[deconv::kBias] != kNullOp)) {
Tensor<gpu, 1, DType> gbias = in_grad[deconv::kBias].get<gpu, 1, DType>(s);
CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_,
&alpha,
out_desc_,
grad_ptr + out_offset_ * g,
&bias_beta,
bias_desc_,
gbias.dptr_ + bias_offset_ * g));
}
if (req[deconv::kWeight] != kNullOp) {
#if CUDNN_MAJOR <= 4
CUDNN_CALL(cudnnConvolutionBackwardFilter_v3(
s->dnn_handle_,
&alpha,
out_desc_,
grad_ptr + out_offset_ * g,
in_desc_,
data_ptr + data_offset_ * g,
backward_conv_desc_,
back_algo_w_,
workspace.dptr_,
backward_workspace_byte_,
&weight_beta,
filter_desc_,
gwmat.dptr_ + weight_offset_ * g));
#elif CUDNN_MAJOR >= 5
CUDNN_CALL(cudnnConvolutionBackwardFilter(
s->dnn_handle_,
&alpha,
out_desc_,
grad_ptr + out_offset_ * g,
in_desc_,
data_ptr + data_offset_ * g,
backward_conv_desc_,
back_algo_w_,
workspace.dptr_,
backward_workspace_byte_,
&weight_beta,
filter_desc_,
gwmat_ptr + weight_offset_ * g));
#endif
}
if (req[deconv::kData] != kNullOp) {
CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_,
&alpha,
out_desc_,
grad_ptr + out_offset_ * g,
filter_desc_,
wmat_ptr + weight_offset_ * g,
backward_conv_desc_,
algo_,
workspace.dptr_,
forward_workspace_byte_,
&data_beta,
in_desc_,
gdata_ptr + data_offset_ * g));
}
}
}