[SOLVED] Segmentation fault in AMGX using matrix_upload_all_global

I am trying to incorporate AMGX into an existing MPI application to test how well it runs, however when I try to use the AMGX_matrix_upload_all_global function I receive a segmentation fault and “Invalid permissions (2)”.

A quick google search indicated this could be due to having non-CUDA supported MPI, but I am using OpenMPI 1.7.2 compiled with CUDA 6.5 support (required for AMGX) and have tested OpenMPI with device pointers and it works fine.

I am using:

  • AMGX 6.5 w/ MPI for Red Hat 6.5
  • GCC 4.8.5
  • CUDA 6.5
  • OpenMPI 1.7.2 (which is what AMGX was built with)

I am running this on a Red Hat 6.7 machine (not sure if that matters as it was compiled for RH6.5). Also, I have 2 GPU’s, which is required for 2 MPI processes (as AMGX maps one MPI process to each GPU). Running this code in serial gets the same error.

I decided to break down the problem with a simple example (same matrix as provided in the user manual but with added boilerplate code for MPI and the setup part) and I get the same error.

Here is the code:

#include <iostream>
#include <iomanip>
#include <stdlib.h>
#include <string>
#include <fstream>
#include <streambuf>
#include "amgx_c.h"
#include "mpi.h"
#include "cuda_runtime.h"

int main( int argc, char* argv[] )
{

    int rank, numtasks;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm comm_ = MPI_COMM_WORLD;

    float *data;
    int *col_ind_global;
    int *row_ptr;
    int global2node[] = {0,0,0,0,0,1,1,1,1,1};

    // First rank
    if(rank == 0){

        data = new float[5];
        col_ind_global = new int[5];
        row_ptr = new int[3];

        data[0]=1; data[1]=-2; data[2]=1; data[3]=-3; data[4]=1;
        col_ind_global[0]=0; col_ind_global[1]=1; col_ind_global[2]=3; 
            col_ind_global[3]=0; col_ind_global[4]=1;
        row_ptr[0]=0; row_ptr[1]=3; row_ptr[2]=5;

    } else if (rank == 1){

        data = new float[5];
        col_ind_global = new int[5];
        row_ptr = new int[3];

        data[0]=1; data[1]=-4; data[2]=1; data[3]=-5; data[4]=1;
        col_ind_global[0]=2; col_ind_global[1]=3; col_ind_global[2]=1; 
            col_ind_global[3]=2; col_ind_global[4]=3;
        row_ptr[0]=0; row_ptr[1]=2; row_ptr[2]=5;

    }

//----------------------------------------------------------------------------80
    //---> Read config file from local directory
    std::string config_str_;
    std::ifstream t("amgx_config.json");

    t.seekg(0, std::ios::end);
    config_str_.reserve(t.tellg());
    t.seekg(0, std::ios::beg);

    config_str_.assign( (std::istreambuf_iterator<char>(t)),
                 std::istreambuf_iterator<char>() );
//----------------------------------------------------------------------------80

    // Print process information to double check data
    for(int i=0; i<numtasks; i++){
        if(rank == i){
            std::cout << "Rank " << rank << std::endl;
            for(int j=0; j<5; j++){
                std::cout << col_ind_global[j] << " - " 
                          << data[j] << std::endl;
            }
            std::cout << std::endl << row_ptr[0] << " " 
                      << row_ptr[1] << " " << row_ptr[2] << std::endl;
            for(int j=0; j<10; j++){
                std::cout << global2node[j] << ", ";
            }
            std::cout << std::endl;
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    AMGX_initialize();
    AMGX_initialize_plugins();

    AMGX_config_handle cfg;                 
    AMGX_matrix_handle matrix;
    AMGX_resources_handle rsrc;

    AMGX_config_create(&cfg, config_str_.c_str());
    
    // AMGX_resources_create_simple(&rsrc, cfg);
    AMGX_resources_create( &rsrc, cfg, &comm_, 1, &rank );
    AMGX_matrix_create(&matrix, rsrc, AMGX_mode_dFFI);

    int nrings; //=2;
    AMGX_config_get_default_number_of_rings(cfg, &nrings);
    // std::cout << "Rank " << rank << " with " << nrings
    //           << " rings." << std::endl;

    int nlocal = 2;
    int nglobal= 4;
    int nnz = 5;
    int blocksizex = 1;
    int blocksizey = 1;

    AMGX_matrix_upload_all_global(matrix, 
        nglobal, nlocal, nnz, blocksizex, blocksizey, row_ptr, 
        col_ind_global, data, NULL, nrings, nrings, global2node );

    MPI_Finalize();          

    delete[] data;
    delete[] col_ind_global;
    delete[] row_ptr;

    return 0;
}

And the error I receive is:

[noone@hostname amgx_upload_global_mat]$ mpirun -np 2 ./test
Rank 1
2 - 1
3 - -4
1 - 1
2 - -5
3 - 1

0 2 5
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
Rank 0
0 - 1
1 - -2
3 - 1
0 - -3
1 - 1

0 3 5
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
License acquired, proceeding
License acquired, proceeding
AMGX version 1.2.0-build108
Built on Dec 22 2014, 10:33:38
Compiled with CUDA Runtime 6.5, using CUDA driver 7.5
[hostname:06257] *** Process received signal ***
[hostname:06257] Signal: Segmentation fault (11)
[hostname:06257] Signal code: Invalid permissions (2)
[hostname:06257] Failing at address: 0x40623ccd0
[hostname:06258] *** Process received signal ***
[hostname:06258] Signal: Segmentation fault (11)
[hostname:06258] Signal code: Invalid permissions (2)
[hostname:06258] Failing at address: 0xc073683d8
[hostname:06257] [ 0] /lib64/libpthread.so.0(+0xf7e0) [0x7f4fec7b67e0]
[hostname:06257] [ 1] /localdata/amgx-6.5-mpi/lib/libamgxsh.so(+0x48f8e3) [0x7f4fed05b8e3]
[hostname:06257] [ 2] /localdata/amgx-6.5-mpi/lib/libamgxsh.so(AMGX_matrix_upload_all_global+0x317) [0x7f4fed366087]
[hostname:06257] [ 3] ./test() [0x4024fe]
[hostname:06257] [ 4] /lib64/libc.so.6(__libc_start_main+0xfd) [0x7f4feba8ad5d]
[hostname:06257] [ 5] ./test() [0x401d59]
[hostname:06257] *** End of error message ***
[hostname:06258] [ 0] /lib64/libpthread.so.0(+0xf7e0) [0x7fdb7ed5f7e0]
[hostname:06258] [ 1] /localdata/amgx-6.5-mpi/lib/libamgxsh.so(+0x48f8e3) [0x7fdb7f6048e3]
[hostname:06258] [ 2] /localdata/amgx-6.5-mpi/lib/libamgxsh.so(AMGX_matrix_upload_all_global+0x317) [0x7fdb7f90f087]
[hostname:06258] [ 3] ./test() [0x4024fe]
[hostname:06258] [ 4] /lib64/libc.so.6(__libc_start_main+0xfd) [0x7fdb7e033d5d]
[hostname:06258] [ 5] ./test() [0x401d59]
[hostname:06258] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 6257 on node hostname exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------

Looking into the header file, it says the C-API function I am trying to use is experimental. Is there a bug in this function? Has anyone else seen this error?

Thanks.

EDIT: The code provided loads the config from a file in the same directory. I have tried many of the provided sample configuration files provided by AMGx but they all produce the same error.

Here is a copy of the output from GDB:

[noone@hostname amgx_upload_global_mat]$ gdb test
GNU gdb (GDB) Red Hat Enterprise Linux (7.2-83.el6)
Copyright (C) 2010 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
Reading symbols from amgx_upload_global_mat/test...done.
(gdb) r
Starting program: amgx_upload_global_mat/test 
[Thread debugging using libthread_db enabled]
Missing separate debuginfo for /usr/lib64/libcuda.so.1
Detaching after fork from child process 6853.
Rank 0
0 - 1
1 - -2
3 - 1
0 - -3
1 - 1

0 3 5
0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
License acquired, proceeding
AMGX version 1.2.0-build108
Built on Dec 22 2014, 10:33:38
Compiled with CUDA Runtime 6.5, using CUDA driver 7.5
[New Thread 0x7fffe5064700 (LWP 6854)]
[New Thread 0x7fffdf7ff700 (LWP 6855)]

Program received signal SIGSEGV, Segmentation fault.
0x00007ffff10f38e3 in amgx::DistributedManager<amgx::TemplateConfig<(AMGX_MemorySpace)1, (AMGX_VecPrecision)1, (AMGX_MatPrecision)1, (AMGX_IndPrecision)2> >::loadDistributedMatrix(int, int, int, int, int const*, long const*, float const*, int, int const*, int, void const*) ()
   from /localdata/amgx-6.5-mpi/lib/libamgxsh.so
Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.166.el6_7.7.x86_64 numactl-2.0.9-2.el6.x86_64 samba-winbind-clients-3.6.23-30.el6_7.x86_64

I fixed the issue. Naturally, it was a mistake on my part.

Upon closer inspection of the API documentation, the col_ind_global array needs to be of type int64_t. I was passing it a 32-bit integer, which is half the byte length it needs to be, causing a segmentation fault. After changing the type, it was able to run, but then I hit a Kernel launch error. Then I realized that my global2node array was incorrect. It should be {0,0,1,1} as there are only four variables.

Here is the correct code:

#include <iostream>
#include <iomanip>
#include <stdlib.h>
#include <string>
#include <fstream>
#include <streambuf>
#include "amgx_c.h"
#include "mpi.h"
#include "cuda_runtime.h"

int main( int argc, char* argv[] )
{

    int rank, numtasks;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm comm_ = MPI_COMM_WORLD;

    double *data;
    int64_t *col_ind_global;
    int *row_ptr;
    int global2node[] = {0,0,1,1};

    // First rank
    if(rank == 0){

        data = new double[5];
        col_ind_global = new int64_t[5];
        row_ptr = new int[3];

        double data_in[] = {1,-2,1,-3,1};
        int64_t colidx_in[] = {0,1,3,0,1};

        for(int i=0; i<5; i++){
            data[i] = data_in[i];
            col_ind_global[i] = colidx_in[i];
        }
        row_ptr[0]=0; row_ptr[1]=3; row_ptr[2]=5;

    } else if (rank == 1){

        data = new double[5];
        col_ind_global = new int64_t[5];
        row_ptr = new int[3];

        double data_in[] = {1,-4,1,-5,1};
        int64_t colidx_in[] = {2,3,1,2,3};

        for(int i=0; i<5; i++){
            data[i] = data_in[i];
            col_ind_global[i] = colidx_in[i];
        }
        row_ptr[0]=0; row_ptr[1]=2; row_ptr[2]=5;

    }

//----------------------------------------------------------------------------80
    //---> Read config file from local directory
    std::string config_str_;
    std::ifstream t("amgx_config.json");

    t.seekg(0, std::ios::end);
    config_str_.reserve(t.tellg());
    t.seekg(0, std::ios::beg);

    config_str_.assign( (std::istreambuf_iterator<char>(t)),
                 std::istreambuf_iterator<char>() );
//----------------------------------------------------------------------------80

    // Print process information to double check data
    for(int i=0; i<numtasks; i++){
        if(rank == i){
            std::cout << "Rank " << rank << std::endl;
            for(int j=0; j<5; j++){
                std::cout << col_ind_global[j] << " - " 
                          << data[j] << std::endl;
            }
            std::cout << std::endl << row_ptr[0] << " " 
                      << row_ptr[1] << " " << row_ptr[2] << std::endl;                
            for(int j=0; j<4; j++){
                std::cout << global2node[j] << ", ";
            }
            std::cout << std::endl;
        }
        MPI_Barrier(MPI_COMM_WORLD);
    }

    AMGX_SAFE_CALL( AMGX_initialize() );
    AMGX_SAFE_CALL( AMGX_initialize_plugins() );

    AMGX_config_handle cfg;                 
    AMGX_matrix_handle matrix;
    AMGX_resources_handle rsrc;

    AMGX_SAFE_CALL( AMGX_config_create(&cfg, config_str_.c_str()) );
    // AMGX_resources_create_simple(&rsrc, cfg);
    AMGX_SAFE_CALL( AMGX_resources_create( &rsrc, cfg, &comm_, 1, &rank ) );
    AMGX_SAFE_CALL( AMGX_matrix_create(&matrix, rsrc, AMGX_mode_dDDI) );

    int nrings; //=2;
    AMGX_config_get_default_number_of_rings(cfg, &nrings);
    // std::cout << "Rank " << rank << " with " << nrings
    //           << " rings." << std::endl;

    int nlocal = 2;
    int nglobal= 4;
    int nnz = 5;
    int blocksizex = 1;
    int blocksizey = 1;

    AMGX_SAFE_CALL( AMGX_matrix_upload_all_global(matrix, 
        nglobal, nlocal, nnz, blocksizex, blocksizey, row_ptr, 
        col_ind_global, data, NULL, nrings, nrings, global2node ) );

    MPI_Finalize();          

    delete[] data;
    delete[] col_ind_global;
    delete[] row_ptr;

    return 0;
}