CUSVM CUDA MEX function compilation by VS2008 and unassigned output error .mexw64 cuda-svm training
I compiled the cuda version of support vector machine([url="http://patternsonascreen.net/cuSVM.html"]cuSVM[/url]) using visual c++ 2008 and cuda toolkit 3.2, and the result is the *.mexw64 version of the svm trainig function.

after trying to use this function in MATLAB:

[code] load heart_scale.mat;

data = heart_scale_inst;
value = heart_scale_label;

C = 10;
kernel = 0.5;
stopcrit = 0.001;

[alphas, beta, svs] = cusvm_cuda_mex(value, data, C, kernel, [], stopcrit);
[/code]
I got the following error:

[code]??? One or more output arguments not assigned during call to "cusvm_cuda_mex".[/code]

But, it seems the mexFunction correctly assigned the outputs to plhs[ ]s.

I don't know what is the source of the problem. The building phase or the mexFunction code.
My operating system is windows 7 64-bit.

Your help and comments are greatly appreciated.

--------------------------------------------------------------------------------------------------------------------------------------------

Here is the svmTrain.cpp (mexFuction code):

[code]#include <mat.h>
#include <mex.h>

extern "C"
void SVMTrain(float *mexalpha,float* beta,float*y,float *x ,float C, float kernelwidth, int m, int n, float StoppingCrit);

extern "C"
void SVRTrain(float *mexalpha,float* beta,float*y,float *x ,float C, float kernelwidth, float eps, int m, int n, float StoppingCrit);

void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] )

{
if (nlhs>3)
mexErrMsgTxt("cuSVMTrain has at most 3 outputs.");

if (nrhs>6)
mexErrMsgTxt("Too many input arguments.");

if (nrhs<5)
mexErrMsgTxt("Too few input arguments.");

if (mxIsClass(prhs[0], "single") + mxIsClass(prhs[1], "single")!=2)
mexErrMsgTxt("Both the target vector and feature matrix must consist of single precision floats.");

int n=mxGetN(prhs[1]);
int m=mxGetM(prhs[1]);

if (mxGetM(prhs[0])!=m)
mexErrMsgTxt("The target vector and feature matrix must have the same number of rows.");

if (mxGetN(prhs[0])!=1)
mexErrMsgTxt("The target vector must only have one column.");

if ((mxGetM(prhs[2])!=1) | (mxGetN(prhs[2])!=1)|(mxGetM(prhs[3])!=1) | (mxGetN(prhs[3])!=1)|((nrhs>=5&&(mxIsEmpty(prhs[4])!=1))?(mxGetM(prhs[4])!=1) | (mxGetN(prhs[4])!=1):0)|(nrhs==6?(mxGetM(prhs[5])!=1) | (mxGetN(prhs[5])!=1):0))
mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be scalars.");



float C;
float kernelwidth;
float eps;


if (mxIsClass(prhs[2],"double")==1)
C=(float)*(double *)mxGetData(prhs[2]);
else if (mxIsClass(prhs[2],"single")==1)
C=*(float *)mxGetData(prhs[2]);
else
mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");

if (mxIsClass(prhs[3],"double")==1)
kernelwidth=(float)*(double *)mxGetData(prhs[3]);
else if (mxIsClass(prhs[3],"single")==1)
kernelwidth=*(float *)mxGetData(prhs[3]);
else
mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");
if (kernelwidth<=0)
mexErrMsgTxt("The kernel width must be greater than zero.");


int IsRegression=0;

if (mxIsEmpty(prhs[4])!=1)
{
IsRegression=1;

if (mxIsClass(prhs[4],"double")==1)
eps=(float)*(double *)mxGetData(prhs[4]);
else if (mxIsClass(prhs[4],"single")==1)
eps=*(float *)mxGetData(prhs[4]);
else
mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");
}

float StoppingCrit=0.001;

if (nrhs==6)
{

if (mxIsClass(prhs[5],"double")==1)
StoppingCrit=(float)*(double *)mxGetData(prhs[5]);
else if (mxIsClass(prhs[5],"single")==1)
StoppingCrit=*(float *)mxGetData(prhs[5]);
else
mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");

if ((StoppingCrit<=0) | (StoppingCrit>=.5) )
mexErrMsgTxt("The stopping criterion must be greater than zero and less than .5.");

}

float* y=(float *)mxGetData(prhs[0]);
float* x=(float *)mxGetData(prhs[1]);

plhs[1]=mxCreateNumericMatrix(1, 1,mxSINGLE_CLASS, mxREAL);
float *beta=(float*)mxGetData(plhs[1]);

float *alpha=new float [m];

if (IsRegression)
{

SVRTrain(alpha,beta,y,x ,C,kernelwidth,eps ,m,n,StoppingCrit);
}
else
{
int JustOneClassError=1;
int NotOneorNegOneError=0;
float FirstY=y[0];

for(int k=0;k<m;k++)
{
if(y[k]!=FirstY) {JustOneClassError=0;}
if((y[k]!=1.0) && (y[k]!=-1.0) ){NotOneorNegOneError=1;}
}

if (JustOneClassError==1)
mexErrMsgTxt("All training labels are of the same class. There must of course be two classes");

if (NotOneorNegOneError==1)
mexErrMsgTxt("Training labels must be either 1 or -1.");

SVMTrain(alpha,beta,y,x ,C,kernelwidth,m,n,StoppingCrit);
}


int numSVs=0;
int numPosSVs=0;
for(int k=0;k<m;k++)
{
if(alpha[k]!=0)
{
if(IsRegression==0)
{
alpha[k]*=y[k];
if(y[k]>0) {numPosSVs++;}
}

numSVs++;
}
}


plhs[0]=mxCreateNumericMatrix(numSVs, 1,mxSINGLE_CLASS, mxREAL);
float *SvAlphas=(float*)mxGetData(plhs[0]);

plhs[2]=mxCreateNumericMatrix(numSVs, n,mxSINGLE_CLASS, mxREAL);
float *Svs=(float*)mxGetData(plhs[2]);



if(IsRegression==0)
{

int PosSvIndex=0;
int NegSvIndex=0;

for(int k=0;k<m;k++)
{
if(alpha[k]!=0)
{
if(y[k]>0)
{
SvAlphas[PosSvIndex]=alpha[k];
for(int j=0;j<n;j++)
{Svs[PosSvIndex+j*numSVs]=x[k+j*m];}
PosSvIndex++;
}
else
{
SvAlphas[NegSvIndex+numPosSVs]=alpha[k];
for(int j=0;j<n;j++)
{Svs[NegSvIndex+numPosSVs+j*numSVs]=x[k+j*m];}
NegSvIndex++;
}
}
}
}
else
{
int svindex=0;

for(int k=0;k<m;k++)
{
if(alpha[k]!=0)
{
SvAlphas[svindex]=alpha[k];
for(int j=0;j<n;j++)
{Svs[svindex+j*numSVs]=x[k+j*m];}
svindex++;
}

}

}

return;
}
[/code]
I compiled the cuda version of support vector machine(cuSVM) using visual c++ 2008 and cuda toolkit 3.2, and the result is the *.mexw64 version of the svm trainig function.



after trying to use this function in MATLAB:



load heart_scale.mat;



data = heart_scale_inst;

value = heart_scale_label;



C = 10;

kernel = 0.5;

stopcrit = 0.001;



[alphas, beta, svs] = cusvm_cuda_mex(value, data, C, kernel, [], stopcrit);


I got the following error:



??? One or more output arguments not assigned during call to "cusvm_cuda_mex".




But, it seems the mexFunction correctly assigned the outputs to plhs[ ]s.



I don't know what is the source of the problem. The building phase or the mexFunction code.

My operating system is windows 7 64-bit.



Your help and comments are greatly appreciated.



--------------------------------------------------------------------------------------------------------------------------------------------



Here is the svmTrain.cpp (mexFuction code):



#include <mat.h>

#include <mex.h>



extern "C"

void SVMTrain(float *mexalpha,float* beta,float*y,float *x ,float C, float kernelwidth, int m, int n, float StoppingCrit);



extern "C"

void SVRTrain(float *mexalpha,float* beta,float*y,float *x ,float C, float kernelwidth, float eps, int m, int n, float StoppingCrit);



void mexFunction( int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[] )



{

if (nlhs>3)

mexErrMsgTxt("cuSVMTrain has at most 3 outputs.");



if (nrhs>6)

mexErrMsgTxt("Too many input arguments.");



if (nrhs<5)

mexErrMsgTxt("Too few input arguments.");



if (mxIsClass(prhs[0], "single") + mxIsClass(prhs[1], "single")!=2)

mexErrMsgTxt("Both the target vector and feature matrix must consist of single precision floats.");



int n=mxGetN(prhs[1]);

int m=mxGetM(prhs[1]);



if (mxGetM(prhs[0])!=m)

mexErrMsgTxt("The target vector and feature matrix must have the same number of rows.");



if (mxGetN(prhs[0])!=1)

mexErrMsgTxt("The target vector must only have one column.");



if ((mxGetM(prhs[2])!=1) | (mxGetN(prhs[2])!=1)|(mxGetM(prhs[3])!=1) | (mxGetN(prhs[3])!=1)|((nrhs>=5&&(mxIsEmpty(prhs[4])!=1))?(mxGetM(prhs[4])!=1) | (mxGetN(prhs[4])!=1):0)|(nrhs==6?(mxGetM(prhs[5])!=1) | (mxGetN(prhs[5])!=1):0))

mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be scalars.");







float C;

float kernelwidth;

float eps;





if (mxIsClass(prhs[2],"double")==1)

C=(float)*(double *)mxGetData(prhs[2]);

else if (mxIsClass(prhs[2],"single")==1)

C=*(float *)mxGetData(prhs[2]);

else

mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");



if (mxIsClass(prhs[3],"double")==1)

kernelwidth=(float)*(double *)mxGetData(prhs[3]);

else if (mxIsClass(prhs[3],"single")==1)

kernelwidth=*(float *)mxGetData(prhs[3]);

else

mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");

if (kernelwidth<=0)

mexErrMsgTxt("The kernel width must be greater than zero.");





int IsRegression=0;



if (mxIsEmpty(prhs[4])!=1)

{

IsRegression=1;



if (mxIsClass(prhs[4],"double")==1)

eps=(float)*(double *)mxGetData(prhs[4]);

else if (mxIsClass(prhs[4],"single")==1)

eps=*(float *)mxGetData(prhs[4]);

else

mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");

}



float StoppingCrit=0.001;



if (nrhs==6)

{



if (mxIsClass(prhs[5],"double")==1)

StoppingCrit=(float)*(double *)mxGetData(prhs[5]);

else if (mxIsClass(prhs[5],"single")==1)

StoppingCrit=*(float *)mxGetData(prhs[5]);

else

mexErrMsgTxt("The regularization parameter (C), the kernel width, epsilon, and the stopping criterion (if specified) all must be either single or double precision floats.");



if ((StoppingCrit<=0) | (StoppingCrit>=.5) )

mexErrMsgTxt("The stopping criterion must be greater than zero and less than .5.");



}



float* y=(float *)mxGetData(prhs[0]);

float* x=(float *)mxGetData(prhs[1]);



plhs[1]=mxCreateNumericMatrix(1, 1,mxSINGLE_CLASS, mxREAL);

float *beta=(float*)mxGetData(plhs[1]);



float *alpha=new float [m];



if (IsRegression)

{



SVRTrain(alpha,beta,y,x ,C,kernelwidth,eps ,m,n,StoppingCrit);

}

else

{

int JustOneClassError=1;

int NotOneorNegOneError=0;

float FirstY=y[0];



for(int k=0;k<m;k++)

{

if(y[k]!=FirstY) {JustOneClassError=0;}

if((y[k]!=1.0) && (y[k]!=-1.0) ){NotOneorNegOneError=1;}

}



if (JustOneClassError==1)

mexErrMsgTxt("All training labels are of the same class. There must of course be two classes");



if (NotOneorNegOneError==1)

mexErrMsgTxt("Training labels must be either 1 or -1.");



SVMTrain(alpha,beta,y,x ,C,kernelwidth,m,n,StoppingCrit);

}





int numSVs=0;

int numPosSVs=0;

for(int k=0;k<m;k++)

{

if(alpha[k]!=0)

{

if(IsRegression==0)

{

alpha[k]*=y[k];

if(y[k]>0) {numPosSVs++;}

}



numSVs++;

}

}





plhs[0]=mxCreateNumericMatrix(numSVs, 1,mxSINGLE_CLASS, mxREAL);

float *SvAlphas=(float*)mxGetData(plhs[0]);



plhs[2]=mxCreateNumericMatrix(numSVs, n,mxSINGLE_CLASS, mxREAL);

float *Svs=(float*)mxGetData(plhs[2]);







if(IsRegression==0)

{



int PosSvIndex=0;

int NegSvIndex=0;



for(int k=0;k<m;k++)

{

if(alpha[k]!=0)

{

if(y[k]>0)

{

SvAlphas[PosSvIndex]=alpha[k];

for(int j=0;j<n;j++)

{Svs[PosSvIndex+j*numSVs]=x[k+j*m];}

PosSvIndex++;

}

else

{

SvAlphas[NegSvIndex+numPosSVs]=alpha[k];

for(int j=0;j<n;j++)

{Svs[NegSvIndex+numPosSVs+j*numSVs]=x[k+j*m];}

NegSvIndex++;

}

}

}

}

else

{

int svindex=0;



for(int k=0;k<m;k++)

{

if(alpha[k]!=0)

{

SvAlphas[svindex]=alpha[k];

for(int j=0;j<n;j++)

{Svs[svindex+j*numSVs]=x[k+j*m];}

svindex++;

}



}



}



return;

}

#1
Posted 02/20/2012 09:46 AM   
Here is the summary of steps that I followed to compile cuSVM in visual studio 2008.
I wonder if I missed any necessary step. /ermm.gif' class='bbc_emoticon' alt=':ermm:' />


[quote]

STEPS TO BUILD CUDA MEX IN VISUAL STUDIO 2008:

(0) create an win32project->next-> choose DLL project and empty project->finish

(1) add *.cpp and *.cu file.
Then go to : Project -> Custom Build Rules -> select the second "CUDA Runtime API Build Rule" (with extension *.v3.2)

(2) add *.rc (resource) file "mexversion.rc" :
C:\Program Files (x86)\MATLAB\MATLAB Component Runtime\v77\extern\include

(3) add a *.def to root of solution(currently with same name of project e.g. cusvm_cuda_mex) and type:
LIBRARY "cusvm_cuda_mex.mexw64"
EXPORTS mexFunction

or simply:
LIBRARY
EXPORTS mexFunction

(4) add *.h file to the root of the solution(cuSVMulti.h)

(5) C/C++ -> Preprocessor -> Preprocessor Definitions
add : MATLAB_MEX_FILE

(6) C/C++ ->Additional Include Directories:
C:\Program Files\MATLAB\R2010b\extern\include
G:\term4\Thesis3\Line-Filtering\SVM Classifier\Codes\8-cuSVM\cuSVM\cuSVM\inc
C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\inc
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include

(7) Linker -> Additional Library Directories
C:\Program Files\MATLAB\R2010b\extern\lib\win64\microsoft
C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\lib
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\lib\x64

(8) Linker -> Input
cudart.lib
cublas.lib
cuda.lib
libmx.lib
libmex.lib
libmat.lib

(9) Linker --->input--->module definition file
add the name of *.def file (e.g. cusvm_cuda_mex.def)

(10) Linker -> Output file
change extension from .dll to .mexw64

(11) Linker -> Command Line -> Additional Options
/export:mexFunction /dll

(12) x64 Settings:
--- Set solution platform (middle of top standard toolbar) to x64
--- Linker -> Advanced -> Target Machine : Machine X64
--- CUDA Runtime API -> Host -> x64

(13) CUDA Runtime API -> General -> Additional Include Directories
C:\Program Files\MATLAB\R2010b\extern\lib\win64\microsoft
C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\inc
G:\term4\Thesis3\Line-Filtering\SVM Classifier\Codes\8-cuSVM\cuSVM\cuSVM\inc
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include
C:\Program Files\MATLAB\R2010b\extern\include

(14) Tools –> Options –> Projects and Solutions –> VC++ Directories

select x64 & include--------------------------------------------------------------------------------------------------------
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include
C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\inc

select x64 & library---------------------------------------------------------------------------------------------------------
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\lib\x64
C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\lib

select x64 & executable----------------------------------------------------------------------------------------------------
C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\bin

(15) Removing the errors

I) Add to line 17 (after finishing the include files) of the *.cu file:
using namespace std;

II) Replace the codes of the lines 623, 952.
(error: argument of type "unsigned int *" is incompatible with parameter of type "size_t *")

Incorrect code:
unsigned int free, total;
cuMemGetInfo(&free, &total);

Correct code:
size_t free, total;
cuMemGetInfo(&free, &total);

III) Download the microsoft hotfix for visual studio 2008: http://support.microsoft.com/kb/948127
can solve this error: fatal error LNK1000: Internal error during IncrBuildImage

--------------------------------------------------------------------------------------------------------------------------------------

Execution
Increase the Stack Reserve size in Project ‐> Properties ‐> Linker ‐> System ‐>
Stack Reserve Size = 500000000


[/quote]
Here is the summary of steps that I followed to compile cuSVM in visual studio 2008.

I wonder if I missed any necessary step. /ermm.gif' class='bbc_emoticon' alt=':ermm:' />









STEPS TO BUILD CUDA MEX IN VISUAL STUDIO 2008:



(0) create an win32project->next-> choose DLL project and empty project->finish



(1) add *.cpp and *.cu file.

Then go to : Project -> Custom Build Rules -> select the second "CUDA Runtime API Build Rule" (with extension *.v3.2)



(2) add *.rc (resource) file "mexversion.rc" :

C:\Program Files (x86)\MATLAB\MATLAB Component Runtime\v77\extern\include



(3) add a *.def to root of solution(currently with same name of project e.g. cusvm_cuda_mex) and type:

LIBRARY "cusvm_cuda_mex.mexw64"

EXPORTS mexFunction



or simply:

LIBRARY

EXPORTS mexFunction



(4) add *.h file to the root of the solution(cuSVMulti.h)



(5) C/C++ -> Preprocessor -> Preprocessor Definitions

add : MATLAB_MEX_FILE



(6) C/C++ ->Additional Include Directories:

C:\Program Files\MATLAB\R2010b\extern\include

G:\term4\Thesis3\Line-Filtering\SVM Classifier\Codes\8-cuSVM\cuSVM\cuSVM\inc

C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\inc

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include



(7) Linker -> Additional Library Directories

C:\Program Files\MATLAB\R2010b\extern\lib\win64\microsoft

C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\lib

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\lib\x64



(8) Linker -> Input

cudart.lib

cublas.lib

cuda.lib

libmx.lib

libmex.lib

libmat.lib



(9) Linker --->input--->module definition file

add the name of *.def file (e.g. cusvm_cuda_mex.def)



(10) Linker -> Output file

change extension from .dll to .mexw64



(11) Linker -> Command Line -> Additional Options

/export:mexFunction /dll



(12) x64 Settings:

--- Set solution platform (middle of top standard toolbar) to x64

--- Linker -> Advanced -> Target Machine : Machine X64

--- CUDA Runtime API -> Host -> x64



(13) CUDA Runtime API -> General -> Additional Include Directories

C:\Program Files\MATLAB\R2010b\extern\lib\win64\microsoft

C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\inc

G:\term4\Thesis3\Line-Filtering\SVM Classifier\Codes\8-cuSVM\cuSVM\cuSVM\inc

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include

C:\Program Files\MATLAB\R2010b\extern\include



(14) Tools –> Options –> Projects and Solutions –> VC++ Directories



select x64 & include--------------------------------------------------------------------------------------------------------

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\include

C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\inc



select x64 & library---------------------------------------------------------------------------------------------------------

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\lib\x64

C:\ProgramData\NVIDIA Corporation\NVIDIA GPU Computing SDK 3.2\C\common\lib



select x64 & executable----------------------------------------------------------------------------------------------------

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v3.2\bin



(15) Removing the errors



I) Add to line 17 (after finishing the include files) of the *.cu file:

using namespace std;



II) Replace the codes of the lines 623, 952.

(error: argument of type "unsigned int *" is incompatible with parameter of type "size_t *")



Incorrect code:

unsigned int free, total;

cuMemGetInfo(&free, &total);



Correct code:

size_t free, total;

cuMemGetInfo(&free, &total);



III) Download the microsoft hotfix for visual studio 2008: http://support.microsoft.com/kb/948127

can solve this error: fatal error LNK1000: Internal error during IncrBuildImage



--------------------------------------------------------------------------------------------------------------------------------------



Execution

Increase the Stack Reserve size in Project ‐> Properties ‐> Linker ‐> System ‐>

Stack Reserve Size = 500000000





#2
Posted 02/24/2012 05:29 PM   
Scroll To Top