I ran into the same problem, spend quite some time debugging and made a repro case before I found this post.
Any ETA on a fix for this?
Below is the repro case:
float *make_device_vector(float vec, int sz)
{
float * vec_d;
checkCUDA( cudaMalloc(&vec_d, sz * sizeof(float)) );
checkCUDA( cudaMemcpy(vec_d, vec, szsizeof(float),
cudaMemcpyHostToDevice) );
return vec_d;
}
void print_device_vector(float *vec_d, int sz)
{
float vec = new float[sz];
checkCUDA( cudaMemcpy(vec, vec_d, szsizeof(float),
cudaMemcpyDeviceToHost) );
for (int i = 0; i < sz; i++)
{
std::cout << vec[i] << " ";
}
std::cout << std::endl;
delete vec;
}
// ======
// Testing cudnnConvolutionBackwardData()
// 1x2 image with 2 channels in, 1 channel out
//
// Out(top)-gradient: (1 0)
// Filter: (1 2)
//
// Expected in(bottom)-gradient: (1 0 2 0)
//
// Outut of this program is:
// 1 0
// 1 2
// 1 0 0 0
//
// I.e., the returned in-gradient is: (1 0 0 0) – Bug!?
//
int main(int argc, char *argv) {
cudnnDataType_t dataType = CUDNN_DATA_FLOAT;
cudnnTensorFormat_t tensorFormat = CUDNN_TENSOR_NCHW;
cudnnHandle_t cudnnHandle;
checkCUDNN( cudnnCreate(&cudnnHandle) );
cudnnConvolutionDescriptor_t conv_desc;
checkCUDNN( cudnnCreateConvolutionDescriptor(&conv_desc) );
checkCUDNN( cudnnSetConvolution2dDescriptor(conv_desc,
0, 0, 1, 1, 1, 1, CUDNN_CONVOLUTION) );
cudnnFilterDescriptor_t filter_desc;
checkCUDNN( cudnnCreateFilterDescriptor(&filter_desc) );
checkCUDNN( cudnnSetFilter4dDescriptor(filter_desc,
CUDNN_DATA_FLOAT, 1, 2, 1, 1) );
cudnnTensorDescriptor_t out_diff_desc;
checkCUDNN( cudnnCreateTensorDescriptor(&out_diff_desc) );
checkCUDNN( cudnnSetTensor4dDescriptor(out_diff_desc,
tensorFormat,
dataType,
1, 1, 1, 2) );
cudnnTensorDescriptor_t in_diff_desc;
checkCUDNN( cudnnCreateTensorDescriptor(&in_diff_desc) );
checkCUDNN( cudnnSetTensor4dDescriptor(in_diff_desc,
tensorFormat,
dataType,
1, 2, 1, 2) );
float out_diff = {1, 0};
float filter = {1, 2};
float *out_diff_d = make_device_vector(out_diff, 2);
float *filter_d = make_device_vector(filter, 2);
float *in_diff_d;
checkCUDA( cudaMalloc(&in_diff_d, 4 * sizeof(float)) );
print_device_vector(out_diff_d, 2);
print_device_vector(filter_d, 2);
float alpha = 1.0f;
float beta = 0.0f;
checkCUDNN( cudnnConvolutionBackwardData(cudnnHandle,
&alpha,
filter_desc, filter_d,
out_diff_desc, out_diff_d,
conv_desc,
&beta, in_diff_desc, in_diff_d) );
print_device_vector(in_diff_d, 4);
}