OpenCL: CL_INVALID_COMMAND_QUEUE when allocated resources are used more intensely.

What could cause this error besides increasing resource pressure per kernel? I checked kernels do not leak nor go out of bounds. But more than 30 kernels compute millions of data points and 720p mode works flawlessly while 1080p mode gives the error in title.

The problematic freezing kernel is:

__kernel void renderProjectilesToTexture(__global uchar4 * restrict  buf, __global int * restrict  pBox,
                                            __global float * restrict  pX, __global float * restrict  pY,
                                            __global float * restrict  parametersFloat, __global int * restrict  parametersInt,
                                            __global unsigned char * restrict  shipTeam, __global float * restrict  pRotation,
                                            __global unsigned char * restrict  pState, __global unsigned char * restrict  pLife,
											__global uchar * restrict shipSizeType, __global uchar4 * restrict projectilePixels,
											__global uchar4 * skyPixels, __global float * restrict bufLightR,
											__global uchar * pixelUsed,__global float * restrict bufLightG,
											__global float * restrict bufLightB,
											__global uchar * restrict projectileCritExplosion)
{
    int i=get_global_id(0); // pixel id
	
	// blocked computing with 16x16 patches of pixels
	int patchId = i/PROJECTILE_RENDER_PATCH_SIZE;

	// pixel's coordinates in its patch
	int localX = i%PROJECTILE_RENDER_PATCH_SIZE_SQRT;
	int localY = (i/PROJECTILE_RENDER_PATCH_SIZE_SQRT)%PROJECTILE_RENDER_PATCH_SIZE_SQRT;

	// first element of each patch will have this X and Y offsets
	int patchX = (patchId % (RENDER_WIDTH/PROJECTILE_RENDER_PATCH_SIZE_SQRT)) * PROJECTILE_RENDER_PATCH_SIZE_SQRT;
	int patchY = (patchId / (RENDER_WIDTH/PROJECTILE_RENDER_PATCH_SIZE_SQRT)) * PROJECTILE_RENDER_PATCH_SIZE_SQRT;

	int blockX=patchX + localX;
	int blockY=patchY + localY;

	// conversion from scanline to blocked
	i=blockX + blockY * RENDER_WIDTH;

    int mapMaxX=@@mapWidth@@;

    //int maxProjectilesPerBox=@@maxProjectilesPerBox@@;
    int nShipProjectiles=@@nShipProjectiles@@;
    int projectileSearchBoxSize=@@projectileSearchBoxSize@@;
	int totalMaxProjectiles=@@nShip@@*nShipProjectiles;

    float scale=parametersFloat[0];
    float translateX=parametersFloat[1];
    float translateY=parametersFloat[2];
                                        
    int x0=((i%RENDER_WIDTH)*scale + (translateX))-(RENDER_WIDTH/2)*scale;
    int y0=((i/RENDER_WIDTH)*scale + (translateY))-(RENDER_HEIGHT/2)*scale;
    float xf=x0;
    float yf=y0;
    int found=0;
    int foundProjectileId=-1;

    int boxX= x0/projectileSearchBoxSize;
    int boxY= y0/projectileSearchBoxSize;
    int boxId=boxX + boxY*(mapMaxX/projectileSearchBoxSize);
	float explosionSize = 0.0f;
	float lightR=0.0f;
	float lightG=0.0f;
	float lightB=0.0f;
	uchar4 foundProjectilePixel = (uchar4)(255,255,255,255);
    for(int w=-1;w<=1;w++)
    {
        int wi=w*(mapMaxX/projectileSearchBoxSize);
        for(int h=-1;h<=1;h++)
        {

            int boxIdCurrent=boxId+h+wi;
            if((boxIdCurrent<2) || (boxIdCurrent>=@@((mapHeight/projectileSearchBoxSize)*(mapWidth/projectileSearchBoxSize))@@-2))
                continue;
            int nProjectilesInBox=pBox[boxIdCurrent];

            // nProjectilesInBox is not overflowing (added later with v3)
            //if(nProjectilesInBox>=nShipProjectiles)
            //    nProjectilesInBox=nShipProjectiles-1;
            
            for(int j=0;j<nProjectilesInBox;j++)
            {
                // todo: check projectile data layout: strided - parallel
                int selectedProjectileId=pBox[boxIdCurrent+(j+1)*N_PROJECTILE_BOX_LAYER];
                // check if (xf,yf) is in projectile
                if( (selectedProjectileId>=0) && (selectedProjectileId<totalMaxProjectiles))
                {
					unsigned char tmpState = pState[selectedProjectileId];
					int tmpSize= 10+ PROJECTILE_SIZE + PROJECTILE_SIZE*shipSizeType[selectedProjectileId%N_SHIP_MAX];
					if((tmpState&PROJECTILE_DEAD)!=0)
						continue;
					if((tmpState&PROJECTILE_EXPLOSION)!=0)
						tmpSize= ((((@@projectileLife@@+@@projectileExplosionLife@@) - pLife[selectedProjectileId]))) ;
					
                    float dx=xf-pX[selectedProjectileId];
                    float dy=yf-pY[selectedProjectileId];
					float dr=(dx*dx + dy*dy);
					float tmpDr=(tmpSize*tmpSize);
					int shipIdOfSelectedProjectile = selectedProjectileId%N_SHIP_MAX;
					uchar stTmp=shipTeam[shipIdOfSelectedProjectile];
					float tmpEfct= 5.0f/(dr+3.0f);
					
					if((pState[selectedProjectileId]&PROJECTILE_EXPLOSION)==0)
					{ 
						if(dr<325.0f)
						{ 
								if(stTmp==0)
									lightR += tmpEfct*2.0f;
								else if(stTmp==1)
									lightG += tmpEfct*2.0f;
								else if(stTmp==2)
									lightB += tmpEfct*2.0f;
						}
					}
					else
					{ 
						if(dr<725.0f)
						{ 
							float explosionSizeTmp=(10.0f - dr/tmpDr);
							if(explosionSizeTmp<0.0f)
								explosionSizeTmp=0.01f;
							if(stTmp==0)
							{lightR += tmpEfct*explosionSizeTmp; lightG += tmpEfct*explosionSizeTmp;lightB += tmpEfct*explosionSizeTmp;}
							else if(stTmp==1)
							{ 	lightR += tmpEfct*explosionSizeTmp;lightG += tmpEfct*explosionSizeTmp;lightB += tmpEfct*explosionSizeTmp;}
							else if(stTmp==2)
							{ lightR += tmpEfct*explosionSizeTmp;lightG += tmpEfct*explosionSizeTmp;lightB += tmpEfct*explosionSizeTmp;}
						}
					}
					// if inside of bounding area of projectile
                    if(dr<tmpDr)
                    {
						if((pState[selectedProjectileId]&PROJECTILE_EXPLOSION)!=0)
						{ 								
							explosionSize=dr/tmpDr;
							found=1;
							if(foundProjectileId<selectedProjectileId)
							{   
								foundProjectileId=selectedProjectileId; 
							}

							break;
						}
						float bitmapX= dx;
						float bitmapY= dy;
						float sr = degreeToRadian( 270.0f - radianToDegree( pRotation[selectedProjectileId]));
						float cosSr=cos(sr);
						float sinSr=sin(sr);
						float bX2 = bitmapX * cosSr - bitmapY * sinSr;
						float bY2 = bitmapX * sinSr + bitmapY * cosSr;
						int bX = bX2+1; // projectile bitmap size / 2
						int bY = bY2+5; // projectile bitmap size / 2
						if((bX>=0) && (bX<3) && (bY>=0) && (bY<10))
						{ 
						    int projectileTeamOffset=shipTeam[shipIdOfSelectedProjectile];
							uchar4 sampledPixel=projectilePixels[bX + 3*bY + 30* projectileTeamOffset];
							if(sampledPixel.s3!=0)
							{ 
								found=1;
								if(foundProjectileId<selectedProjectileId)
								{   
									foundProjectileId=selectedProjectileId; 
								}

								foundProjectilePixel=sampledPixel;

							}
							
						}
                    }
                }
            }
        }
    }

    if(found>0)
    {
        //uchar st=shipTeam[foundProjectileId%N_SHIP_MAX];
		// projectile is in explosion animation
        
		if(((pState[foundProjectileId]&PROJECTILE_EXPLOSION)!=0))
		{
			// not critical explosion
			if(projectileCritExplosion[foundProjectileId]==0)
			{ 
				uchar red=255;
				uchar green=255-255*explosionSize;
				uchar blue=255-255*explosionSize;
				uchar alpha=255;
				// explosion being beneath of ship is 1 and 2 on 1d4. same height is 3. above is 4 on 1d4
				pixelUsed[i]=1;
				buf[i]=(uchar4)(blue,green,red,alpha);	
			}
			else
			{ 
				uchar red=255;
				uchar green=255-255*explosionSize;
				uchar blue=0; // criticl hit = yellow explosion
				uchar alpha=255;
				// explosion being beneath of ship is 1 and 2 on 1d4. same height is 3. above is 4 on 1d4
				pixelUsed[i]=1;
				buf[i]=(uchar4)(blue,green,red,alpha);		
			}
		}
		else
		{
			// no explosion, only projectile pixel
			if(foundProjectilePixel.s0!=0)
			{ 
				buf[i]=foundProjectilePixel;
				pixelUsed[i]=1;
			}
			else
			{ 
				pixelUsed[i]=0;
			}
		}
    }
	else
	{ 
		pixelUsed[i]=0;
		// render sky
		bufLightR[i]=lightR;
		bufLightG[i]=lightG;
		bufLightB[i]=lightB;
		float bitmapX=((xf - MAP_WIDTH/2) + ((xf )*5.0f - translateX*5.0f)/scale)*0.2f;
		float bitmapY=((yf - MAP_WIDTH/2) + ((yf )*5.0f - translateY*5.0f)/scale)*0.2f;
		float sr = degreeToRadian(270.0f);
		float cosSr=cos(sr);
		float sinSr=sin(sr);
		float bX2 = bitmapX * cosSr - bitmapY * sinSr;
		float bY2 = bitmapX * sinSr + bitmapY * cosSr;
		int bX = bX2+SKY_BITMAP_WIDTH/2;
		int bY = bY2+SKY_BITMAP_HEIGHT/2;
		if((bX>=0) && (bX<SKY_BITMAP_WIDTH) && (bY>=0) && (bY<SKY_BITMAP_HEIGHT))
		{ 
			uchar4 result=skyPixels[bX + bY * SKY_BITMAP_WIDTH];
			if(result.s3!=0)
				buf[i]=(uchar4)((result.s0*2.2f)/3.0f,(result.s1*1.2f)/3.0f,(result.s2*1.2f)/3.0f,255);
		}
	}

}

A tester of my benchmark project left a feedback about this kernel being problematic but not always, only 6 out of 10 trials(and only when resource usage is higher(on 1080p mode. 720p works without problem, according to testers)). Also it runs on CPU too, without leaks.

Binaries: https://github.com/tugrul512bit/KaloriferBenchmarkGPU/blob/master/EpicWarCL_v0_1_5_6_1080p_benchmark.rar

Could anyone can try 1080p mode with gtx1050 or gtx1060 using Nsight please?

Here is a single-queue version of it(just in case multiple queues are problem without explicit timing control): https://github.com/tugrul512bit/KaloriferBenchmarkGPU/blob/master/EpicWarCL_v0_1_5_6_1080p_benchmark_2.rar

This has extra out-of-bounds checking for boxes(on top of being single queued): https://github.com/tugrul512bit/KaloriferBenchmarkGPU/blob/master/EpicWarCL_v0_1_5_6_1080p_benchmark_3.rar

Hi,

I got 404 error about the link you gave.