Functions
MLCommon::LinAlg::opg Namespace Reference

Functions

void eigDC (const raft::handle_t &h, float *eigenValues, float *eigenVectors, std::vector< Matrix::Data< float > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream)
 Multi-GPU version of Eigen decomposition. This function works for symmetric matrices only. Whole input matrix is gathered at rank 0 and Eigen decomposition is carried out sequentially. More...
 
void eigDC (const raft::handle_t &h, double *eigenValues, double *eigenVectors, std::vector< Matrix::Data< double > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream)
 
void eigJacobi (const raft::handle_t &h, float *eigenValues, float *eigenVectors, std::vector< Matrix::Data< float > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream)
 
void eigJacobi (const raft::handle_t &h, double *eigenValues, double *eigenVectors, std::vector< Matrix::Data< double > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream)
 
void gemm (const raft::handle_t &h, std::vector< Matrix::Data< float > * > &outZParts, Matrix::PartDescriptor &outZDesc, std::vector< Matrix::Data< float > * > &inXParts, Matrix::PartDescriptor &inXDesc, std::vector< Matrix::Data< float > * > &inYParts, Matrix::PartDescriptor &inYDesc, int myRank, cudaStream_t stream)
 A multi gpu generalized matrix multiplication function. This function performs Z = X * Y The X and Y matrix are distributed in blocks on different ranks. First Y matrix is duplicated at each rank. It is multiplied with blocks of X local to the rank. More...
 
void gemm (const raft::handle_t &h, std::vector< Matrix::Data< double > * > &outZParts, Matrix::PartDescriptor &outZDesc, std::vector< Matrix::Data< double > * > &inXParts, Matrix::PartDescriptor &inXDesc, std::vector< Matrix::Data< double > * > &inYParts, Matrix::PartDescriptor &inYDesc, int myRank, cudaStream_t stream)
 
void lstsqEig (const raft::handle_t &handle, const std::vector< Matrix::Data< float > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< float > * > &b, float *w, cudaStream_t *streams, int n_streams)
 performs MNMG Least squares calculation. More...
 
void lstsqEig (const raft::handle_t &handle, const std::vector< Matrix::Data< double > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< double > * > &b, double *w, cudaStream_t *streams, int n_streams)
 
void meanSquaredError (double *out, const Matrix::Data< double > &in1, const Matrix::PartDescriptor &in1Desc, const Matrix::Data< double > &in2, const Matrix::PartDescriptor &in2Desc, const raft::comms::comms_t &comm, cudaStream_t stream, int root=0, bool broadcastResult=true)
 multi-gpu mean squared error More...
 
void meanSquaredError (float *out, const Matrix::Data< float > &in1, const Matrix::PartDescriptor &in1Desc, const Matrix::Data< float > &in2, const Matrix::PartDescriptor &in2Desc, const raft::comms::comms_t &comm, cudaStream_t stream, int root=0, bool broadcastResult=true)
 
void mm_aTa (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &in1, const Matrix::PartDescriptor &in1Desc, cudaStream_t *streams, int n_streams)
 performs MNMG A^T x A calculation. More...
 
void mm_aTa (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &in1, const Matrix::PartDescriptor &in1Desc, cudaStream_t *streams, int n_streams)
 
void mv_aTb (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< double > * > &b, cudaStream_t *streams, int n_streams)
 performs MNMG A^T x b calculation. More...
 
void mv_aTb (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< float > * > &b, cudaStream_t *streams, int n_streams)
 
void colNorm2 (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams)
 performs MNMG Least squares calculation. More...
 
void colNorm2 (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams)
 
void colNorm2NoSeq (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams)
 
void colNorm2NoSeq (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams)
 
void svdEig (const raft::handle_t &handle, const std::vector< Matrix::Data< float > * > &A, const Matrix::PartDescriptor &ADesc, std::vector< Matrix::Data< float > * > &U, float *S, float *V, cudaStream_t *streams, int n_streams)
 performs MNMG Least squares calculation. More...
 
void svdEig (const raft::handle_t &handle, const std::vector< Matrix::Data< double > * > &A, const Matrix::PartDescriptor &ADesc, std::vector< Matrix::Data< double > * > &U, double *S, double *V, cudaStream_t *streams, int n_streams)
 

Function Documentation

◆ colNorm2() [1/2]

void MLCommon::LinAlg::opg::colNorm2 ( const raft::handle_t &  handle,
Matrix::Data< double > &  out,
const std::vector< Matrix::Data< double > * > &  in,
const Matrix::PartDescriptor inDesc,
cudaStream_t *  streams,
int  n_streams 
)

performs MNMG Least squares calculation.

◆ colNorm2() [2/2]

void MLCommon::LinAlg::opg::colNorm2 ( const raft::handle_t &  handle,
Matrix::Data< float > &  out,
const std::vector< Matrix::Data< float > * > &  in,
const Matrix::PartDescriptor inDesc,
cudaStream_t *  streams,
int  n_streams 
)

◆ colNorm2NoSeq() [1/2]

void MLCommon::LinAlg::opg::colNorm2NoSeq ( const raft::handle_t &  handle,
Matrix::Data< double > &  out,
const std::vector< Matrix::Data< double > * > &  in,
const Matrix::PartDescriptor inDesc,
cudaStream_t *  streams,
int  n_streams 
)

◆ colNorm2NoSeq() [2/2]

void MLCommon::LinAlg::opg::colNorm2NoSeq ( const raft::handle_t &  handle,
Matrix::Data< float > &  out,
const std::vector< Matrix::Data< float > * > &  in,
const Matrix::PartDescriptor inDesc,
cudaStream_t *  streams,
int  n_streams 
)

◆ eigDC() [1/2]

void MLCommon::LinAlg::opg::eigDC ( const raft::handle_t &  h,
double *  eigenValues,
double *  eigenVectors,
std::vector< Matrix::Data< double > * > &  inParts,
Matrix::PartDescriptor desc,
int  myRank,
cudaStream_t  stream 
)

◆ eigDC() [2/2]

void MLCommon::LinAlg::opg::eigDC ( const raft::handle_t &  h,
float *  eigenValues,
float *  eigenVectors,
std::vector< Matrix::Data< float > * > &  inParts,
Matrix::PartDescriptor desc,
int  myRank,
cudaStream_t  stream 
)

Multi-GPU version of Eigen decomposition. This function works for symmetric matrices only. Whole input matrix is gathered at rank 0 and Eigen decomposition is carried out sequentially.

Parameters
hcuML handle object.
[out]eigenValuesOutput N Eigen values.
[out]eigenVectorsOutput N Eigen vectors of size N x 1.
[in]inPartsInput symmetric matrix of size N x N.
[in]descDescriptor of input matrix.
[in]myRankMPI rank of the process.
[in]streamCUDA stream.

◆ eigJacobi() [1/2]

void MLCommon::LinAlg::opg::eigJacobi ( const raft::handle_t &  h,
double *  eigenValues,
double *  eigenVectors,
std::vector< Matrix::Data< double > * > &  inParts,
Matrix::PartDescriptor desc,
int  myRank,
cudaStream_t  stream 
)

◆ eigJacobi() [2/2]

void MLCommon::LinAlg::opg::eigJacobi ( const raft::handle_t &  h,
float *  eigenValues,
float *  eigenVectors,
std::vector< Matrix::Data< float > * > &  inParts,
Matrix::PartDescriptor desc,
int  myRank,
cudaStream_t  stream 
)

◆ gemm() [1/2]

void MLCommon::LinAlg::opg::gemm ( const raft::handle_t &  h,
std::vector< Matrix::Data< double > * > &  outZParts,
Matrix::PartDescriptor outZDesc,
std::vector< Matrix::Data< double > * > &  inXParts,
Matrix::PartDescriptor inXDesc,
std::vector< Matrix::Data< double > * > &  inYParts,
Matrix::PartDescriptor inYDesc,
int  myRank,
cudaStream_t  stream 
)

◆ gemm() [2/2]

void MLCommon::LinAlg::opg::gemm ( const raft::handle_t &  h,
std::vector< Matrix::Data< float > * > &  outZParts,
Matrix::PartDescriptor outZDesc,
std::vector< Matrix::Data< float > * > &  inXParts,
Matrix::PartDescriptor inXDesc,
std::vector< Matrix::Data< float > * > &  inYParts,
Matrix::PartDescriptor inYDesc,
int  myRank,
cudaStream_t  stream 
)

A multi gpu generalized matrix multiplication function. This function performs Z = X * Y The X and Y matrix are distributed in blocks on different ranks. First Y matrix is duplicated at each rank. It is multiplied with blocks of X local to the rank.

Parameters
hcuML handle object.
[out]outZPartsResult of the multiplication with size M x N. Distributed across ranks according to inXDesc/outZDesc descriptor.
[out]outZDescDescriptor for outZParts matrix. It has to be same as inXDesc.
[in]inXPartsInput matrix X with dimensions M x K. Distributed across ranks according to inXDesc descriptor.
[in]inXDescDescriptor for X matrix.
[in]inYPartsInput matrix Y with dimensions K x N. Distributed across ranks according to inYDesc descriptor.
[in]inYDescDescriptor for Y matrix.
[in]myRankRank of calling process.
[in]streamcuda stream on which work is launched.

◆ lstsqEig() [1/2]

void MLCommon::LinAlg::opg::lstsqEig ( const raft::handle_t &  handle,
const std::vector< Matrix::Data< double > * > &  A,
const Matrix::PartDescriptor ADesc,
const std::vector< Matrix::Data< double > * > &  b,
double *  w,
cudaStream_t *  streams,
int  n_streams 
)

◆ lstsqEig() [2/2]

void MLCommon::LinAlg::opg::lstsqEig ( const raft::handle_t &  handle,
const std::vector< Matrix::Data< float > * > &  A,
const Matrix::PartDescriptor ADesc,
const std::vector< Matrix::Data< float > * > &  b,
float *  w,
cudaStream_t *  streams,
int  n_streams 
)

performs MNMG Least squares calculation.

◆ meanSquaredError() [1/2]

void MLCommon::LinAlg::opg::meanSquaredError ( double *  out,
const Matrix::Data< double > &  in1,
const Matrix::PartDescriptor in1Desc,
const Matrix::Data< double > &  in2,
const Matrix::PartDescriptor in2Desc,
const raft::comms::comms_t &  comm,
cudaStream_t  stream,
int  root = 0,
bool  broadcastResult = true 
)

multi-gpu mean squared error

Parameters
[out]outthe output data (device pointer)
[in]in1the first input data
[in]in1Descdescriptor for the first input data
[in]in2the second input data
[in]in2Descdescriptor for the second input data
[in]commthe communicator object
[in]streamcuda stream where to launch work
[in]rootworker ID which is supposed to be considered as root
[in]broadcastResultif false, only root process will have the result, else all ranks

◆ meanSquaredError() [2/2]

void MLCommon::LinAlg::opg::meanSquaredError ( float *  out,
const Matrix::Data< float > &  in1,
const Matrix::PartDescriptor in1Desc,
const Matrix::Data< float > &  in2,
const Matrix::PartDescriptor in2Desc,
const raft::comms::comms_t &  comm,
cudaStream_t  stream,
int  root = 0,
bool  broadcastResult = true 
)

◆ mm_aTa() [1/2]

void MLCommon::LinAlg::opg::mm_aTa ( const raft::handle_t &  handle,
Matrix::Data< double > &  out,
const std::vector< Matrix::Data< double > * > &  in1,
const Matrix::PartDescriptor in1Desc,
cudaStream_t *  streams,
int  n_streams 
)

performs MNMG A^T x A calculation.

Parameters
[in]handlecuML handle object
[out]outresulting A^T x A matrix
[in]in1input data
[in]in1DescMNMG description of the input
[in]streamscuda streams
[in]n_streamsnumber of streams

◆ mm_aTa() [2/2]

void MLCommon::LinAlg::opg::mm_aTa ( const raft::handle_t &  handle,
Matrix::Data< float > &  out,
const std::vector< Matrix::Data< float > * > &  in1,
const Matrix::PartDescriptor in1Desc,
cudaStream_t *  streams,
int  n_streams 
)

◆ mv_aTb() [1/2]

void MLCommon::LinAlg::opg::mv_aTb ( const raft::handle_t &  handle,
Matrix::Data< double > &  out,
const std::vector< Matrix::Data< double > * > &  A,
const Matrix::PartDescriptor ADesc,
const std::vector< Matrix::Data< double > * > &  b,
cudaStream_t *  streams,
int  n_streams 
)

performs MNMG A^T x b calculation.

Parameters
[in]handlecuML handle object
[out]outresulting A^T x b matrix
[in]Ainput data matrix
[in]ADescMNMG description of A
[in]binput vector
[in]streamscuda streams
[in]n_streamsnumber of streams

◆ mv_aTb() [2/2]

void MLCommon::LinAlg::opg::mv_aTb ( const raft::handle_t &  handle,
Matrix::Data< float > &  out,
const std::vector< Matrix::Data< float > * > &  A,
const Matrix::PartDescriptor ADesc,
const std::vector< Matrix::Data< float > * > &  b,
cudaStream_t *  streams,
int  n_streams 
)

◆ svdEig() [1/2]

void MLCommon::LinAlg::opg::svdEig ( const raft::handle_t &  handle,
const std::vector< Matrix::Data< double > * > &  A,
const Matrix::PartDescriptor ADesc,
std::vector< Matrix::Data< double > * > &  U,
double *  S,
double *  V,
cudaStream_t *  streams,
int  n_streams 
)

◆ svdEig() [2/2]

void MLCommon::LinAlg::opg::svdEig ( const raft::handle_t &  handle,
const std::vector< Matrix::Data< float > * > &  A,
const Matrix::PartDescriptor ADesc,
std::vector< Matrix::Data< float > * > &  U,
float *  S,
float *  V,
cudaStream_t *  streams,
int  n_streams 
)

performs MNMG Least squares calculation.