error while loading shared libraries: libcudart.so.4: cannot open shared object file: No such file o
Hi,

I am trying to run the following program. It contains OpenMPI and CUDA code.

[code]
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <sys/time.h>
#include <mpi.h>

#define NREPEAT 10
#define NBYTES 10.e6

int main (int argc, char *argv[])
{
int rank, size, n, len, numbytes;
void *a_h, *a_d;
struct timeval time[2];
double bandwidth;
char name[MPI_MAX_PROCESSOR_NAME];
MPI_Status status;

MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Comm_size (MPI_COMM_WORLD, &size);

MPI_Get_processor_name(name, &len);
printf("Process %d is on %s\n", rank, name);

printf("Using regular memory \n");
a_h = malloc(NBYTES);

cudaMalloc( (void **) &a_d, NBYTES);

/* Test host -> device bandwidth. */
MPI_Barrier(MPI_COMM_WORLD);

gettimeofday(&time[0], NULL);
for (n=0; n<NREPEAT; n )
{
cudaMemcpy(a_d, a_h, NBYTES, cudaMemcpyHostToDevice);
}
gettimeofday(&time[1], NULL);

bandwidth = time[1].tv_sec - time[0].tv_sec;
bandwidth = 1.e-6*(time[1].tv_usec - time[0].tv_usec);
bandwidth = NBYTES*NREPEAT/1.e6/bandwidth;

printf("Host->device bandwidth for process %d: %f MB/sec\n",rank,bandwidth);

/* Test MPI send/recv bandwidth. */
MPI_Barrier(MPI_COMM_WORLD);

gettimeofday(&time[0], NULL);
for (n=0; n<NREPEAT; n )
{
if (rank == 0)
MPI_Send(a_h, NBYTES/sizeof(int), MPI_INT, 1, 0, MPI_COMM_WORLD);
else
MPI_Recv(a_h, NBYTES/sizeof(int), MPI_INT, 0, 0, MPI_COMM_WORLD, &status);
}
gettimeofday(&time[1], NULL);

bandwidth = time[1].tv_sec - time[0].tv_sec;
bandwidth = 1.e-6*(time[1].tv_usec - time[0].tv_usec);
bandwidth = NBYTES*NREPEAT/1.e6/bandwidth;

if (rank == 0)
printf("MPI send/recv bandwidth: %f MB/sec\n", bandwidth);

cudaFree(a_d);
free(a_h);

MPI_Finalize();
return 0;
}
[/code]

To compile I am using :

mpicc mpibandwidth.c -o mpibandwidth -I /usr/local/cuda/include -L /usr/local/cuda/lib -lcudart

To execute I am using :

/usr/local/bin/mpirun --mca btl tcp,self --mca btl_tcp_if_include eth0 --hostfile slaves -np 5 mpibandwidth

I am getting error after executing :
error while loading shared libraries: libcudart.so.4: cannot open shared object file: No such file or directory

My PATH and LD_LIBRARY_PATH variables are:

PATH = /usr/lib/qt-3.3/bin:/usr/local/ns-allinone/bin:/usr/local/ns-allinone/tcl8.4.18/unix:/usr/local/ns-allinone/tk8.4.18/unix:/usr/local/cuda/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:/usr/local/lib/:/usr/local/lib/openmpi:/usr/local/cuda/bin

LD_LIBRARY_PATH = :/usr/local/lib:/usr/local/lib/openmpi/:/usr/local/cuda/lib

:/usr/local/lib:/usr/local/lib/openmpi/:/usr/local/cuda/lib

libcudart.so.4 is present in usr/local/cuda/lib and it is in LD path

Any idea what is missing?
Can someone help please

Thanks
Hi,



I am trying to run the following program. It contains OpenMPI and CUDA code.





#include <stdio.h>

#include <stdlib.h>

#include <cuda.h>

#include <cuda_runtime.h>

#include <sys/time.h>

#include <mpi.h>



#define NREPEAT 10

#define NBYTES 10.e6



int main (int argc, char *argv[])

{

int rank, size, n, len, numbytes;

void *a_h, *a_d;

struct timeval time[2];

double bandwidth;

char name[MPI_MAX_PROCESSOR_NAME];

MPI_Status status;



MPI_Init (&argc, &argv);

MPI_Comm_rank (MPI_COMM_WORLD, &rank);

MPI_Comm_size (MPI_COMM_WORLD, &size);



MPI_Get_processor_name(name, &len);

printf("Process %d is on %s\n", rank, name);



printf("Using regular memory \n");

a_h = malloc(NBYTES);



cudaMalloc( (void **) &a_d, NBYTES);



/* Test host -> device bandwidth. */

MPI_Barrier(MPI_COMM_WORLD);



gettimeofday(&time[0], NULL);

for (n=0; n<NREPEAT; n )

{

cudaMemcpy(a_d, a_h, NBYTES, cudaMemcpyHostToDevice);

}

gettimeofday(&time[1], NULL);



bandwidth = time[1].tv_sec - time[0].tv_sec;

bandwidth = 1.e-6*(time[1].tv_usec - time[0].tv_usec);

bandwidth = NBYTES*NREPEAT/1.e6/bandwidth;



printf("Host->device bandwidth for process %d: %f MB/sec\n",rank,bandwidth);



/* Test MPI send/recv bandwidth. */

MPI_Barrier(MPI_COMM_WORLD);



gettimeofday(&time[0], NULL);

for (n=0; n<NREPEAT; n )

{

if (rank == 0)

MPI_Send(a_h, NBYTES/sizeof(int), MPI_INT, 1, 0, MPI_COMM_WORLD);

else

MPI_Recv(a_h, NBYTES/sizeof(int), MPI_INT, 0, 0, MPI_COMM_WORLD, &status);

}

gettimeofday(&time[1], NULL);



bandwidth = time[1].tv_sec - time[0].tv_sec;

bandwidth = 1.e-6*(time[1].tv_usec - time[0].tv_usec);

bandwidth = NBYTES*NREPEAT/1.e6/bandwidth;



if (rank == 0)

printf("MPI send/recv bandwidth: %f MB/sec\n", bandwidth);



cudaFree(a_d);

free(a_h);



MPI_Finalize();

return 0;

}




To compile I am using :



mpicc mpibandwidth.c -o mpibandwidth -I /usr/local/cuda/include -L /usr/local/cuda/lib -lcudart



To execute I am using :



/usr/local/bin/mpirun --mca btl tcp,self --mca btl_tcp_if_include eth0 --hostfile slaves -np 5 mpibandwidth



I am getting error after executing :

error while loading shared libraries: libcudart.so.4: cannot open shared object file: No such file or directory



My PATH and LD_LIBRARY_PATH variables are:



PATH = /usr/lib/qt-3.3/bin:/usr/local/ns-allinone/bin:/usr/local/ns-allinone/tcl8.4.18/unix:/usr/local/ns-allinone/tk8.4.18/unix:/usr/local/cuda/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:/usr/local/lib/:/usr/local/lib/openmpi:/usr/local/cuda/bin



LD_LIBRARY_PATH = :/usr/local/lib:/usr/local/lib/openmpi/:/usr/local/cuda/lib



:/usr/local/lib:/usr/local/lib/openmpi/:/usr/local/cuda/lib



libcudart.so.4 is present in usr/local/cuda/lib and it is in LD path



Any idea what is missing?

Can someone help please



Thanks

#1
Posted 04/21/2012 04:40 AM   
Are you on a 32 bit system?
If you are on a 64 bit system the link should be -L/usr/local/cuda/lib64.

Is the library present on the "slaves" systems?
What is the output on ldd on the remote systems when you run through ssh (" ssh slave0 ldd mpibandwidth", assuming slave0 is the name of one of the 5 systems in slaves)?
Are you on a 32 bit system?

If you are on a 64 bit system the link should be -L/usr/local/cuda/lib64.



Is the library present on the "slaves" systems?

What is the output on ldd on the remote systems when you run through ssh (" ssh slave0 ldd mpibandwidth", assuming slave0 is the name of one of the 5 systems in slaves)?

#2
Posted 04/21/2012 06:28 AM   
The machine is 64 bit but installed cuda is 32 bit.
Yes all the machines have same installation and path variables.

Although , ssh slave01 ldd mpibandwidth.exe gives me
ldd : ./mpibadwidth.exe no such file or directory /sad.gif' class='bbc_emoticon' alt=':sad:' />

Any ideas?

Thanks
The machine is 64 bit but installed cuda is 32 bit.

Yes all the machines have same installation and path variables.



Although , ssh slave01 ldd mpibandwidth.exe gives me

ldd : ./mpibadwidth.exe no such file or directory /sad.gif' class='bbc_emoticon' alt=':sad:' />



Any ideas?



Thanks

#3
Posted 04/21/2012 06:44 AM   
Your executable is called mpibandwidth not mpibandwidth.exe ( you are specifying -o mpibandwidth when you compile).
Mixing 64-bit libraries and 32-bit libraries is not a good idea (mpicc is probably compiling for a 64 bit target).
Do a "file mpibandwidth" to see if your executable is 32 or 64 bit.


M
Your executable is called mpibandwidth not mpibandwidth.exe ( you are specifying -o mpibandwidth when you compile).

Mixing 64-bit libraries and 32-bit libraries is not a good idea (mpicc is probably compiling for a 64 bit target).

Do a "file mpibandwidth" to see if your executable is 32 or 64 bit.





M

#4
Posted 04/21/2012 07:02 AM   
Sorry for the confusion.

Even ssh slave0 ldd mpibandwidth gives same error
ldd : ./mpibadwidth no such file or directory

file mpibandwidth gives
mpibandwidth: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 2.6.18, not stripped

I have several other Openmpi programs which work fine on slaves using shared libraries.
Sorry for the confusion.



Even ssh slave0 ldd mpibandwidth gives same error

ldd : ./mpibadwidth no such file or directory



file mpibandwidth gives

mpibandwidth: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 2.6.18, not stripped



I have several other Openmpi programs which work fine on slaves using shared libraries.

#5
Posted 04/21/2012 07:08 AM   
Is LD_LIBRARY_PATH properly set on the slaves?

BTW, with 32 bit executables you will not be able to use the new GPU-aware MPI.
The code you are trying to run is correct ( it was written in my group and we use it routinely to check clusters), so the problem
is in your set up.
Is LD_LIBRARY_PATH properly set on the slaves?



BTW, with 32 bit executables you will not be able to use the new GPU-aware MPI.

The code you are trying to run is correct ( it was written in my group and we use it routinely to check clusters), so the problem

is in your set up.

#6
Posted 04/21/2012 03:28 PM   
[quote name='mfatica' date='21 April 2012 - 11:28 PM' timestamp='1335022092' post='1399125']
Is LD_LIBRARY_PATH properly set on the slaves?

BTW, with 32 bit executables you will not be able to use the new GPU-aware MPI.
The code you are trying to run is correct ( it was written in my group and we use it routinely to check clusters), so the problem
is in your set up.
[/quote]

Yes the LD paths are correctly set on slaves.
I got this sample code from this forum and i just want to test my cluster setup is correct.

"32 bit executables you will not be able to use the new GPU-aware MPI" - What does this actually means? Any alternatives for this?
I am using mpi 1.4.5 and cuda 4.1 toolkit

Would it work if I keep mpi and cuda code separately and then compile them into single object file?

When I run that executable on only one machine it works. If it works on 1 machines it should work on cluster? all machines have same configuration.
[quote name='mfatica' date='21 April 2012 - 11:28 PM' timestamp='1335022092' post='1399125']

Is LD_LIBRARY_PATH properly set on the slaves?



BTW, with 32 bit executables you will not be able to use the new GPU-aware MPI.

The code you are trying to run is correct ( it was written in my group and we use it routinely to check clusters), so the problem

is in your set up.





Yes the LD paths are correctly set on slaves.

I got this sample code from this forum and i just want to test my cluster setup is correct.



"32 bit executables you will not be able to use the new GPU-aware MPI" - What does this actually means? Any alternatives for this?

I am using mpi 1.4.5 and cuda 4.1 toolkit



Would it work if I keep mpi and cuda code separately and then compile them into single object file?



When I run that executable on only one machine it works. If it works on 1 machines it should work on cluster? all machines have same configuration.

#7
Posted 04/22/2012 02:28 AM   
Scroll To Top