Problem with arbitrary-sized textures Error "invalid argument" with cudaBindTexture2D()

Hi,

in my application I have a large linear segment in device memory interpreted as multiple equally-sized 2D-layers. For fast sparse access I want to bind each layer to a texture.

This works perfectly fine if the dimensions for these layers are somewhat “common”, like 512x512 or 800x640. But for more arbitrary sizes like 1000x700 or 800x641 cudaBindTexture2D() exits with the error “invalid argument” when attempting to bind the second texture. The first texture binding seems to complete successfully (however, I did not check this by actually accessing this first texture).

Here is the source code for a minimal program that reproduces the error:

#include <stdio.h>

#include <cuda.h>

#include <cutil_inline.h>

texture<float, 2> tex0, tex1;

int main(int argc, char** argv)

{

  const size_t W = 1000;

  const size_t H = 700;

  const size_t SIZE = W * H;

  const size_t MEMSIZE = W * H * sizeof(float);

  const size_t PITCH = W * sizeof(float);

  const size_t NUM_TEXS = 2;

/* Allocate and initialize device data. */

  float* d_data;

  cutilSafeCall(cudaMalloc((void**)&d_data, NUM_TEXS * MEMSIZE));

  cutilSafeCall(cudaMemset(d_data, 0, NUM_TEXS * MEMSIZE));

/* Bind data to the two textures. */

  cudaChannelFormatDesc chD = cudaCreateChannelDesc<float>();

float* ptr = d_data;

  printf("Binding 1st %ux%u data segment to 1st texture...", W, H);

  cutilSafeCall(cudaBindTexture2D(NULL, &tex0, ptr, &chD, W, H, PITCH));

  printf(" done.\n");

/* Increase data pointer to the start of the next layer. */

  ptr += SIZE;

  printf("Binding 2nd %ux%u data segment to 2nd texture...", W, H);

  cutilSafeCall(cudaBindTexture2D(NULL, &tex1, ptr, &chD, W, H, PITCH)); /* This line throws an error. */

  printf(" done.\n");

cutilExit(argc, argv);

}

In case W and H are set to 800 and 640 the program terminates successfully:

Binding 1st 800x640 data segment to 1st texture... done.

Binding 2nd 800x640 data segment to 2nd texture... done.

Press ENTER to exit...

For WxH = 1000x700 it does not:

Binding 1st 1000x700 data segment to 1st texture... done.

Binding 2nd 1000x700 data segment to 2nd texture...cudaSafeCall() Runtime API error in file <d:/coding/CUDA/my projects/TexBi

ndTest//main.cu>, line 32 : invalid argument.

AFAIK the returned error for the second call of cudaBindTexture2D() is not an error actually returned by the first call, since cudaBindTexture2D() isn’t asynchronous. In particular inserting even multiple lines of cutilSafeCall(cudaThreadSynchronize()) between the two bindings does not change the behavior.

Some dimensions that work: 800x640, 800x700, 800x642, 1000x640

…and some that don’t: 1000x700, 800x641, 1000x641, 1000x642

I guess CUDA has problems with textures having a particular number of texels, but I’m not able to figure out what exactly and I couldn’t find anything about restrictions either.

Is this a bug or am I missing something?

Ok, I experimented with the above example program and found something. It appears that the line [font=“Courier New”]ptr += SIZE;[/font] invalidates the pointer to the data if [font=“Courier New”]SIZE[/font] is not a multiple of 64. Or more specifically if [font=“Courier New”]MEMSIZE[/font] is not a multiple of 256 bytes. This would mean that the data bound to textures must be aligned to 256 bytes, which sounds a bit strange to me.

A possible workaround in my case would then be to pad the data’s bytes accordingly to the next multiple of 256 for each data layer.

However, it would be great if someone could confirm this, because I don’t want to implement a workaround for something possibly plain I’m just missing… Thanks in advance.


EDIT:

Ok, I think I finally got it. Any data bound to textures must of course be aligned to 256 bytes. That’s why there is a member named [font=“Courier New”]textureAlignment[/font] in the [font=“Courier New”]cudaDeviceProp[/font] struct…

For the sake of completeness I’ll post a corrected version of the above program, which pads the data if necessary, in a few minutes.


EDIT 2:

I now found the way of correctly binding misaligned data to a texture and accessing it with [font=“Courier New”]tex2D()[/font]. The data does not necessarily have to be padded manually. The key is the write-back parameter [font=“Courier New”]offset[/font] of [font=“Courier New”]cudaBindTexture2D()[/font]. This value has to be passed to the kernel accessing the textures to calculate the corrected offset from the nearest lower aligned starting address of the texture to its first real element. Like I said above here is a corrected version of the sample program that does this (compile and run in emulation mode):

#include <stdio.h>

#include <cuda.h>

#include <cutil_inline.h>

texture<float, 2> tex0, tex1;

__global__

void readTexs(size_t offset, size_t w, size_t h)

{

  float val0 = tex2D(tex0, 100, 100);

offset /= sizeof(float);

  size_t xOffset = offset % w;

  size_t yOffset = offset / w;

  float val1 = tex2D(tex1, xOffset + 100, yOffset + 100);

printf("val0 = %f\nval1 = %f\n", val0, val1);

}

int main(int argc, char** argv)

{

  const size_t W = 1000;

  const size_t H = 700;

  const size_t SIZE = W * H;

  const size_t MEMSIZE = W * H * sizeof(float);

  const size_t PITCH = W * sizeof(float);

  const size_t NUM_TEXS = 2;

/* Allocate and initialize device data. */

  float* d_data;

  cutilSafeCall(cudaMalloc((void**)&d_data, NUM_TEXS * MEMSIZE));

  cutilSafeCall(cudaMemset(d_data, 0, NUM_TEXS * MEMSIZE));

  /* Mark a specific value in both layers at position (100,100). */

  float val0 = 1.0f;

  float val1 = 2.0f;

  unsigned int pos = 100 * W + 100;

  cutilSafeCall(cudaMemcpy(&d_data[pos],		&val0, sizeof(val0), cudaMemcpyHostToDevice));

  cutilSafeCall(cudaMemcpy(&d_data, &val1, sizeof(val1), cudaMemcpyHostToDevice));

/* Bind data to the two textures. */

  cudaChannelFormatDesc chD = cudaCreateChannelDesc<float>();

float* ptr = d_data;

  printf("Binding 1st %ux%u data segment to 1st texture...", W, H);

  cutilSafeCall(cudaBindTexture2D(NULL, &tex0, ptr, &chD, W, H, PITCH));

  printf(" done.\n");

/* Increase data pointer to the start of the next layer. */

  ptr += SIZE;

  printf("Binding 2nd %ux%u data segment to 2nd texture...", W, H);

  size_t offset;

  cutilSafeCall(cudaBindTexture2D(&offset, &tex1, ptr, &chD, W, H, PITCH));

  printf(" done (offset: %u).\n", offset);

/* Read the marked values. */

  readTexs<<<1,1>>>(offset, W, H);

cutilExit(argc, argv);

}