Bilinear interpolation problem

Hello all
I have problem,I finish my program with c(in the cpu) because it’s slow
so I want perform program in CUDA but i have no idea
the image has been saved f
Someone suggest something?
my code :
for (j=0; jHeight ; j++)
{
for (i=0 ; iWidth3 ; i+=3)
{
if(GU[j][i/3]>=0 && GU[j][i/3]Width-1 && GV[j][i/3]>=0 && GV[j][i/3]Height-1)
{
ncol1 = f[i + 2 + 3 * w * j];
ncol2 = f[i + 5 + 3 * w * j];
ncol3 = f[i + 1202 + 3 * w * j];
ncol4 = f[i + 1205 + 3 * w * j];
colR = (byte)((1.0-GFU[j][i/3])
(1.0-GFV[j][i/3])(double)ncol1+(GFU[j][i/3])(1.0-GFV[j][i/3])(double)ncol2
+(1.0-GFU[j][i/3])
(GFV[j][i/3])(double)ncol3+(GFU[j][i/3])(GFV[j][i/3])(double)ncol4);
ncol1 = f[i + 1 + 3 * w * j];
ncol2 = f[i + 4 + 3 * w * j];
ncol3 = f[i + 1201 + 3 * w * j];
ncol4 = f[i + 1204 + 3 * w * j];
colG = (byte)((1.0-GFU[j][i/3])
(1.0-GFV[j][i/3])(double)ncol1+(GFU[j][i/3])(1.0-GFV[j][i/3])(double)ncol2
+(1.0-GFU[j][i/3])
(GFV[j][i/3])(double)ncol3+(GFU[j][i/3])(GFV[j][i/3])(double)ncol4);
ncol1 = f[i+ 3 * w * j];
ncol2 = f[i+3 + 3 * w * j];
ncol3 = f[i+1203 + 3 * w * j];
ncol4 = f[i+1206 + 3 * w * j];
colB = (byte)((1.0-GFU[j][i/3])
(1.0-GFV[j][i/3])(double)ncol1+(GFU[j][i/3])(1.0-GFV[j][i/3])(double)ncol2
+(1.0-GFU[j][i/3])
(GFV[j][i/3])(double)ncol3+(GFU[j][i/3])(GFV[j][i/3])*(double)ncol4);

}
}
}

			for (i=0 ; i<360000 ; i+=3)
			{
				if(GU2[i/3]>=0 && GU2[i/3]<Dmp->Width-1 && GV2[i/3]>=0 && GV2[i/3]<Dmp->Height-1)
				{
					 Label42->Caption=f[3];
					 Label43->Caption=c[3];
					 ncol1 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+2];
					 ncol2 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+5];
					 ncol3 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1202];
					 ncol4 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1205];
					 colR = (byte)((1.0-GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol1+(GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol2
								 +(1.0-GFU2[i/3])*(GFV2[i/3])*(double)ncol3+(GFU2[i/3])*(GFV2[i/3])*(double)ncol4);
					ncol1 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1];
					ncol2 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+4];
					ncol3 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1201];
					ncol4 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1204];
					colG = (byte)((1.0-GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol1+(GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol2
								 +(1.0-GFU2[i/3])*(GFV2[i/3])*(double)ncol3+(GFU2[i/3])*(GFV2[i/3])*(double)ncol4);
					ncol1 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200];
					ncol2 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+3];
					ncol3 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1200];
					ncol4 = f[(int)GU2[i/3]*3+(int)GV2[i/3]*1200+1203];
					colB = (byte)((1.0-GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol1+(GFU2[i/3])*(1.0-GFV2[i/3])*(double)ncol2
								 +(1.0-GFU2[i/3])*(GFV2[i/3])*(double)ncol3+(GFU2[i/3])*(GFV2[i/3])*(double)ncol4);

				}

Linear/bilinear/trilinear interpolations can be effectively performed in CUDA by using texture memory.

Do you have any information i can learn?
ex: website or books ,lesson?

Seems like your Google is broken ;)

http://www.mathworks.com/matlabcentral/fileexchange/23795-nvidia-cuda-based-bilinear–2d–interpolation

http://mkaczanowski.com/bilinear-interpolation-with-nvidia-cuda-c/

http://www.orangeowlsolutions.com/archives/300
http://www.orangeowlsolutions.com/archives/206
http://cuda-programming.blogspot.com/2013/02/texture-memory-in-cuda-what-is-texture.html
http://cuda-programming.blogspot.com/2013/04/texture-references-object-in-cuda.html