Hello, i am new to cuda programming and i am writing a program for bessel function on cuda but when i am compiling and executing it. it is giving me segmentation fault everytime. i am unable to debug it. Please help!!
Here is my code (main file):
#include <stdio.h>
#include <bits/stdc++.h>
#include <cuComplex.h>
#include <thrust/complex.h>
#include "bessel.h"
const int N = 5;
const int blocksize = 5;
__global__ void add(cuDoubleComplex *a)
{
//a[threadIdx.x] =cuCadd(a[threadIdx.x], a[threadIdx.x]);
cuDoubleComplex c = a[threadIdx.x];
cuDoubleComplex b = bessel(c);
a[threadIdx.x]=b;
printf(" The value is %lf + %lfi \n",cuCreal(b),cuCimag(b));
}
int main()
{
double a[N] = {97.24, 80.60, 54.13, -60.05, 99.96};
double b[N] = {-67.42, -10.54, -5.83, 85.21, -98.33};
cuDoubleComplex *c,*d;
for(int i=0;i<N;i++)
{
c[i]=make_cuDoubleComplex(a[i],b[i]);
}
printf("Working fine");
cudaMalloc( (void**)&d, N * sizeof(cuDoubleComplex) );
cudaMemcpy( d, c, N * sizeof(cuDoubleComplex), cudaMemcpyHostToDevice );
printf("\nhere too");
dim3 dimBlock( blocksize, 1 );
dim3 dimGrid( 1, 1 );
add<<<dimGrid, dimBlock>>>(d);
cudaMemcpy( c, d, N * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost );
printf("here also\n");
cudaFree( d );
return EXIT_SUCCESS;
}
here is my other file(bessel function):
#include <cuda.h>
#include <cuComplex.h>
#include <thrust/complex.h>
#include <cuda_runtime.h>
#include <bits/stdc++.h>
#include <math.h>
using namespace std;
__device__ cuDoubleComplex cuClog(cuDoubleComplex a)
{
cuDoubleComplex b;
double x = cuCreal(a);
double y = cuCimag(a);
double r = sqrt((pow(x,2)+pow(y,2)));
double theta = atan(y/x);
b=make_cuDoubleComplex(log(r),theta);
return b;
}
__device__ cuDoubleComplex cuCexp(cuDoubleComplex x)
{
double factor = exp(x.x);
return make_cuDoubleComplex(factor * cos(x.y), factor * sin(x.y));
}
__device__ cuDoubleComplex cuCsqrt(cuDoubleComplex x)
{
double radius = cuCabs(x);
double cosA = x.x / radius;
cuDoubleComplex out;
out.x = sqrt(radius * (cosA + 1.0) / 2.0);
out.y = sqrt(radius * (1.0 - cosA) / 2.0);
// signbit should be false if x.y is negative
if (signbit(x.y))
out.y *= -1.0;
return out;
}
__device__ cuDoubleComplex cuCpow(const cuDoubleComplex& z, const int &n) {
return make_cuDoubleComplex(pow((double)cuCabs(z), n)*cos(n*(double)atan2(cuCimag(z),cuCreal(z))), (pow(cuCabs(z), n)*sin(n*(double)atan2(cuCimag(z),cuCreal(z)))));
}
__device__ cuDoubleComplex cumul(cuDoubleComplex a,float b)
{
cuDoubleComplex c;
c.x=cuCreal(a)*(double(b));
c.y=cuCimag(a)*(double(b));
return c;
}
__device__ cuDoubleComplex cudiv(cuDoubleComplex a,float b)
{
cuDoubleComplex d;
d.x=cuCreal(a)/(double(b));
d.x=cuCimag(a)/(double(b));
return d;
}
__device__ cuDoubleComplex bessel(cuDoubleComplex z)
{
float a[12] = {0.125e+00, 7.03125e-02, 7.32421875e-02,1.1215209960938e-01, 2.2710800170898e-01, 5.7250142097473e-01,
1.7277275025845e+00, 6.0740420012735e+00,
2.4380529699556e+01, 1.1001714026925e+02,
5.5133589612202e+02, 3.0380905109224e+03};
float a0;
float a1[10] = {0.125e+00, 0.2109375e+00,
1.0986328125e+00, 1.1775970458984e+01,
2.1461706161499e+002, 5.9511522710323e+03,
2.3347645606175e+05, 1.2312234987631e+07,
8.401390346421e+08, 7.2031420482627e+10};
float b[12] = {
-0.375e+00, -1.171875e-01,
-1.025390625e-01, -1.4419555664063e-01,
-2.7757644653320e-01, -6.7659258842468e-01,
-1.9935317337513e+00, -6.8839142681099e+00,
-2.7248827311269e+01, -1.2159789187654e+02,
-6.0384407670507e+02, -3.3022722944809e+03 };
cuDoubleComplex ca, cb, cbi0, cbi1, cbk0, cbk1, cdi0, cdi1, cdk0, cdk1, ci, cr, cs, ct, cw;
int k0;
float pi,w0;
cuDoubleComplex z1,z2,zr,zr2;
pi = 3.141592653589793e+00;
ci = make_cuDoubleComplex(0.0e+00, 1.0e+00);
a0 = cuCabs(z);
z2 = cuCmul(z, z);
z1 = z;
if ( a0 == 0.0e+00 ){
cbi0 = make_cuDoubleComplex(1.0e+00, 0.0e+00);
cbi1 = make_cuDoubleComplex( 0.0e+00, 0.0e+00);
cdi0 = make_cuDoubleComplex( 0.0e+00, 0.0e+00);
cdi1 = make_cuDoubleComplex( 0.5e+00, 0.0e+00);
cbk0 = make_cuDoubleComplex( 1.0e+30, 0.0e+00);
cbk1 = make_cuDoubleComplex( 1.0e+30, 0.0e+00);
cdk0 = make_cuDoubleComplex( -1.0e+30, -0.0e+00);
cdk1 = make_cuDoubleComplex( -1.0e+30, -0.0e+00);
}
printf("abc\n");
if (cuCreal(z) < 0.0e+00)
z1 = make_cuDoubleComplex(-cuCreal(z), -cuCimag(z));
if ( a0 <= 18.0e+00 ){
cbi0 = make_cuDoubleComplex(1.0e+00, 0.0e+00);
cr = make_cuDoubleComplex(1.0e+00, 0.0e+00);
for(int k=1;k<=50;k++){
printf("def\n");
cr=cumul(cr,a[k]);
cr=cuCmul(cr,z2);
cr=cudiv(cr,float(k*(k+1)));
cbi1=cuCadd(cbi1,cr);
if ( cuCabs(cuCdiv(cr,cbi1)) < 1.0e-15 )
break;
}
cbi1=cumul(cuCmul(z1,cbi1),0.5e+00);
}
else if ( a0 < 35.0e+00 )
k0 = 12;
else if ( a0 < 50.0e+00 )
k0 = 9;
else
k0 = 7;
ca = cuCdiv(cuCexp(z1), cuCsqrt(cumul(z1,pi*2.0e+00)));
cbi0 = make_cuDoubleComplex( 1.0e+00, 0.0e+00);
zr = cudiv(z1,1.0e+00);
for(int k = 1;k<=k0;k++)
{
cuDoubleComplex temp2=make_cuDoubleComplex(double(a[k]),0.0e+00);
cbi0 = cuCpow(cuCadd(cbi0,cumul(zr,a[k])),k);
}
cbi0 = cuCmul(ca, cbi0);
cbi1 = make_cuDoubleComplex(1.0e+00, 0.0e+00);
for(int k = 1;k <= k0;k++){
//cuDoubleComplex temp3=make_cuDoubleComplex(double(k),0.0e+00);
//cuDoubleComplex temp4=make_cuDoubleComplex(double(b[k]),0.0e+00);
cbi1 = cuCpow(cuCadd(cbi1,cumul(zr,b[k])),k);
}
cbi1 = cuCmul(ca, cbi1);
if ( a0 <= 9.0e+00 ) //then
{
//cuDoubleComplex temp5=make_cuDoubleComplex(0.5e+00,0.0e+00);
cs = make_cuDoubleComplex( 0.0e+00, 0.0e+00);
ct = cuClog(cumul(z1,0.5e+00));
ct = make_cuDoubleComplex(-cuCreal(ct),-cuCimag(ct));
cuDoubleComplex temp14=make_cuDoubleComplex(-0.5772156649015329e+00,0.0e+00);
ct = cuCsub(ct,temp14); //-0.5772156649015329e+00;
w0 = 0.0e+00;
cr = make_cuDoubleComplex( 1.0e+00, 0.0e+00);
for(int k=1;k<=50;k++)
{
w0 = w0 + 1.0e+00 / k;
//cuDoubleComplex temp6=make_cuDoubleComplex(0.25e+00,0.0e+00);
//cuDoubleComplex temp7=make_cuDoubleComplex(double(k*k),0.0e+00);
cr = cumul(cudiv(cr,float(k*k)),0.25e+00); //* z2;
cr = cuCmul(cr,z2);
cuDoubleComplex temp8=make_cuDoubleComplex(double(w0),0.0e+00);
cs = cuCadd(cs,cuCmul(cr,cuCadd(temp8,ct)));
if (cuCabs(cuCdiv(cuCsub(cs,cw),cs)) < 1.0e-15 ) //then
break;
//end if
cw = cs;
}
cbk0 = cuCadd(ct,cs);
}
else
{
//cuDoubleComplex temp9=make_cuDoubleComplex(0.5e+00,0.0e+00);
//cuDoubleComplex temp10=make_cuDoubleComplex(1.0e+00,0.0e+00);
cb = cudiv(z1,0.5e+00);
zr2 = cudiv(z2,1.0e+00);
cbk0 = make_cuDoubleComplex( 1.0e+00, 0.0e+00);
for(int k=1;k<=10;k++)
{
//cuDoubleComplex temp11=make_cuDoubleComplex(double(a1[k]),0.0e+00);
cbk0 = cuCpow(cuCadd(cbk0,cumul(zr2,a1[k])),k);
}
cbk0 = cuCdiv(cuCmul(cb,cbk0),cbi0);
}//end if
//cuDoubleComplex temp12=make_cuDoubleComplex(1.0e+00,0.0e+00);
cbk1 = cuCdiv(cuCsub(cudiv(z1,1.0e+00),cuCmul(cbi1,cbk0)),cbi0);
if ( cuCreal(z)< 0.0e+00 ) //then
{
if ( cuCimag(z) < 0.0e+00 ) //then
{
cbk0 = cuCadd(cbk0,cuCmul(cumul(ci,pi),cbi0));
cbk1 = cuCadd(cbk1,cuCmul(cumul(ci,pi),cbi1));
cbk1 = make_cuDoubleComplex(-cuCreal(cbk1),-cuCimag(cbk1)); //cbk1 = - cbk1 + ci * piC * cbi1;
}
else
{
cbk0 = cuCsub(cbk0,cuCmul(cumul(ci,pi),cbi0));
cbk1 = cuCsub(cbk1,cuCmul(cumul(ci,pi),cbi1));
cbk1 = make_cuDoubleComplex(-cuCreal(cbk1),-cuCimag(cbk1));
/*cbk0 = cbk0 - ci * piC * cbi0;
cbk1 = - cbk1 - ci * piC * cbi1;*/
}
//end if
cbi1 = make_cuDoubleComplex(-cuCreal(cbi1),-cuCimag(cbi1));
}//end if
cdi0 = cbi1;
cuDoubleComplex temp13=make_cuDoubleComplex(1.0e+00,0.0e+00);
cdi1 = cuCsub(cbi0,cuCmul(cudiv(z,1.0e+00),cbi1));
cdk0 = make_cuDoubleComplex(-cuCreal(cbk1),-cuCimag(cbk1));
cdk1 = cuCsub(cbk0,cuCmul(cuCdiv(temp13,z),cbk1));
cdk1 = make_cuDoubleComplex(-cuCreal(cdk1),-cuCimag(cdk1));
return cbk0;
}