Functions | |
| void | eigDC (const raft::handle_t &h, float *eigenValues, float *eigenVectors, std::vector< Matrix::Data< float > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream) |
| Multi-GPU version of Eigen decomposition. This function works for symmetric matrices only. Whole input matrix is gathered at rank 0 and Eigen decomposition is carried out sequentially. More... | |
| void | eigDC (const raft::handle_t &h, double *eigenValues, double *eigenVectors, std::vector< Matrix::Data< double > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream) |
| void | eigJacobi (const raft::handle_t &h, float *eigenValues, float *eigenVectors, std::vector< Matrix::Data< float > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream) |
| void | eigJacobi (const raft::handle_t &h, double *eigenValues, double *eigenVectors, std::vector< Matrix::Data< double > * > &inParts, Matrix::PartDescriptor &desc, int myRank, cudaStream_t stream) |
| void | gemm (const raft::handle_t &h, std::vector< Matrix::Data< float > * > &outZParts, Matrix::PartDescriptor &outZDesc, std::vector< Matrix::Data< float > * > &inXParts, Matrix::PartDescriptor &inXDesc, std::vector< Matrix::Data< float > * > &inYParts, Matrix::PartDescriptor &inYDesc, int myRank, cudaStream_t stream) |
| A multi gpu generalized matrix multiplication function. This function performs Z = X * Y The X and Y matrix are distributed in blocks on different ranks. First Y matrix is duplicated at each rank. It is multiplied with blocks of X local to the rank. More... | |
| void | gemm (const raft::handle_t &h, std::vector< Matrix::Data< double > * > &outZParts, Matrix::PartDescriptor &outZDesc, std::vector< Matrix::Data< double > * > &inXParts, Matrix::PartDescriptor &inXDesc, std::vector< Matrix::Data< double > * > &inYParts, Matrix::PartDescriptor &inYDesc, int myRank, cudaStream_t stream) |
| void | lstsqEig (const raft::handle_t &handle, const std::vector< Matrix::Data< float > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< float > * > &b, float *w, cudaStream_t *streams, int n_streams) |
| performs MNMG Least squares calculation. More... | |
| void | lstsqEig (const raft::handle_t &handle, const std::vector< Matrix::Data< double > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< double > * > &b, double *w, cudaStream_t *streams, int n_streams) |
| void | meanSquaredError (double *out, const Matrix::Data< double > &in1, const Matrix::PartDescriptor &in1Desc, const Matrix::Data< double > &in2, const Matrix::PartDescriptor &in2Desc, const raft::comms::comms_t &comm, cudaStream_t stream, int root=0, bool broadcastResult=true) |
| multi-gpu mean squared error More... | |
| void | meanSquaredError (float *out, const Matrix::Data< float > &in1, const Matrix::PartDescriptor &in1Desc, const Matrix::Data< float > &in2, const Matrix::PartDescriptor &in2Desc, const raft::comms::comms_t &comm, cudaStream_t stream, int root=0, bool broadcastResult=true) |
| void | mm_aTa (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &in1, const Matrix::PartDescriptor &in1Desc, cudaStream_t *streams, int n_streams) |
| performs MNMG A^T x A calculation. More... | |
| void | mm_aTa (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &in1, const Matrix::PartDescriptor &in1Desc, cudaStream_t *streams, int n_streams) |
| void | mv_aTb (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< double > * > &b, cudaStream_t *streams, int n_streams) |
| performs MNMG A^T x b calculation. More... | |
| void | mv_aTb (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &A, const Matrix::PartDescriptor &ADesc, const std::vector< Matrix::Data< float > * > &b, cudaStream_t *streams, int n_streams) |
| void | colNorm2 (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams) |
| performs MNMG Least squares calculation. More... | |
| void | colNorm2 (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams) |
| void | colNorm2NoSeq (const raft::handle_t &handle, Matrix::Data< double > &out, const std::vector< Matrix::Data< double > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams) |
| void | colNorm2NoSeq (const raft::handle_t &handle, Matrix::Data< float > &out, const std::vector< Matrix::Data< float > * > &in, const Matrix::PartDescriptor &inDesc, cudaStream_t *streams, int n_streams) |
| void | svdEig (const raft::handle_t &handle, const std::vector< Matrix::Data< float > * > &A, const Matrix::PartDescriptor &ADesc, std::vector< Matrix::Data< float > * > &U, float *S, float *V, cudaStream_t *streams, int n_streams) |
| performs MNMG Least squares calculation. More... | |
| void | svdEig (const raft::handle_t &handle, const std::vector< Matrix::Data< double > * > &A, const Matrix::PartDescriptor &ADesc, std::vector< Matrix::Data< double > * > &U, double *S, double *V, cudaStream_t *streams, int n_streams) |
| void MLCommon::LinAlg::opg::colNorm2 | ( | const raft::handle_t & | handle, |
| Matrix::Data< double > & | out, | ||
| const std::vector< Matrix::Data< double > * > & | in, | ||
| const Matrix::PartDescriptor & | inDesc, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
performs MNMG Least squares calculation.
| void MLCommon::LinAlg::opg::colNorm2 | ( | const raft::handle_t & | handle, |
| Matrix::Data< float > & | out, | ||
| const std::vector< Matrix::Data< float > * > & | in, | ||
| const Matrix::PartDescriptor & | inDesc, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::colNorm2NoSeq | ( | const raft::handle_t & | handle, |
| Matrix::Data< double > & | out, | ||
| const std::vector< Matrix::Data< double > * > & | in, | ||
| const Matrix::PartDescriptor & | inDesc, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::colNorm2NoSeq | ( | const raft::handle_t & | handle, |
| Matrix::Data< float > & | out, | ||
| const std::vector< Matrix::Data< float > * > & | in, | ||
| const Matrix::PartDescriptor & | inDesc, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::eigDC | ( | const raft::handle_t & | h, |
| double * | eigenValues, | ||
| double * | eigenVectors, | ||
| std::vector< Matrix::Data< double > * > & | inParts, | ||
| Matrix::PartDescriptor & | desc, | ||
| int | myRank, | ||
| cudaStream_t | stream | ||
| ) |
| void MLCommon::LinAlg::opg::eigDC | ( | const raft::handle_t & | h, |
| float * | eigenValues, | ||
| float * | eigenVectors, | ||
| std::vector< Matrix::Data< float > * > & | inParts, | ||
| Matrix::PartDescriptor & | desc, | ||
| int | myRank, | ||
| cudaStream_t | stream | ||
| ) |
Multi-GPU version of Eigen decomposition. This function works for symmetric matrices only. Whole input matrix is gathered at rank 0 and Eigen decomposition is carried out sequentially.
| h | cuML handle object. | |
| [out] | eigenValues | Output N Eigen values. |
| [out] | eigenVectors | Output N Eigen vectors of size N x 1. |
| [in] | inParts | Input symmetric matrix of size N x N. |
| [in] | desc | Descriptor of input matrix. |
| [in] | myRank | MPI rank of the process. |
| [in] | stream | CUDA stream. |
| void MLCommon::LinAlg::opg::eigJacobi | ( | const raft::handle_t & | h, |
| double * | eigenValues, | ||
| double * | eigenVectors, | ||
| std::vector< Matrix::Data< double > * > & | inParts, | ||
| Matrix::PartDescriptor & | desc, | ||
| int | myRank, | ||
| cudaStream_t | stream | ||
| ) |
| void MLCommon::LinAlg::opg::eigJacobi | ( | const raft::handle_t & | h, |
| float * | eigenValues, | ||
| float * | eigenVectors, | ||
| std::vector< Matrix::Data< float > * > & | inParts, | ||
| Matrix::PartDescriptor & | desc, | ||
| int | myRank, | ||
| cudaStream_t | stream | ||
| ) |
| void MLCommon::LinAlg::opg::gemm | ( | const raft::handle_t & | h, |
| std::vector< Matrix::Data< double > * > & | outZParts, | ||
| Matrix::PartDescriptor & | outZDesc, | ||
| std::vector< Matrix::Data< double > * > & | inXParts, | ||
| Matrix::PartDescriptor & | inXDesc, | ||
| std::vector< Matrix::Data< double > * > & | inYParts, | ||
| Matrix::PartDescriptor & | inYDesc, | ||
| int | myRank, | ||
| cudaStream_t | stream | ||
| ) |
| void MLCommon::LinAlg::opg::gemm | ( | const raft::handle_t & | h, |
| std::vector< Matrix::Data< float > * > & | outZParts, | ||
| Matrix::PartDescriptor & | outZDesc, | ||
| std::vector< Matrix::Data< float > * > & | inXParts, | ||
| Matrix::PartDescriptor & | inXDesc, | ||
| std::vector< Matrix::Data< float > * > & | inYParts, | ||
| Matrix::PartDescriptor & | inYDesc, | ||
| int | myRank, | ||
| cudaStream_t | stream | ||
| ) |
A multi gpu generalized matrix multiplication function. This function performs Z = X * Y The X and Y matrix are distributed in blocks on different ranks. First Y matrix is duplicated at each rank. It is multiplied with blocks of X local to the rank.
| h | cuML handle object. | |
| [out] | outZParts | Result of the multiplication with size M x N. Distributed across ranks according to inXDesc/outZDesc descriptor. |
| [out] | outZDesc | Descriptor for outZParts matrix. It has to be same as inXDesc. |
| [in] | inXParts | Input matrix X with dimensions M x K. Distributed across ranks according to inXDesc descriptor. |
| [in] | inXDesc | Descriptor for X matrix. |
| [in] | inYParts | Input matrix Y with dimensions K x N. Distributed across ranks according to inYDesc descriptor. |
| [in] | inYDesc | Descriptor for Y matrix. |
| [in] | myRank | Rank of calling process. |
| [in] | stream | cuda stream on which work is launched. |
| void MLCommon::LinAlg::opg::lstsqEig | ( | const raft::handle_t & | handle, |
| const std::vector< Matrix::Data< double > * > & | A, | ||
| const Matrix::PartDescriptor & | ADesc, | ||
| const std::vector< Matrix::Data< double > * > & | b, | ||
| double * | w, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::lstsqEig | ( | const raft::handle_t & | handle, |
| const std::vector< Matrix::Data< float > * > & | A, | ||
| const Matrix::PartDescriptor & | ADesc, | ||
| const std::vector< Matrix::Data< float > * > & | b, | ||
| float * | w, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
performs MNMG Least squares calculation.
| void MLCommon::LinAlg::opg::meanSquaredError | ( | double * | out, |
| const Matrix::Data< double > & | in1, | ||
| const Matrix::PartDescriptor & | in1Desc, | ||
| const Matrix::Data< double > & | in2, | ||
| const Matrix::PartDescriptor & | in2Desc, | ||
| const raft::comms::comms_t & | comm, | ||
| cudaStream_t | stream, | ||
| int | root = 0, |
||
| bool | broadcastResult = true |
||
| ) |
multi-gpu mean squared error
| [out] | out | the output data (device pointer) |
| [in] | in1 | the first input data |
| [in] | in1Desc | descriptor for the first input data |
| [in] | in2 | the second input data |
| [in] | in2Desc | descriptor for the second input data |
| [in] | comm | the communicator object |
| [in] | stream | cuda stream where to launch work |
| [in] | root | worker ID which is supposed to be considered as root |
| [in] | broadcastResult | if false, only root process will have the result, else all ranks |
| void MLCommon::LinAlg::opg::meanSquaredError | ( | float * | out, |
| const Matrix::Data< float > & | in1, | ||
| const Matrix::PartDescriptor & | in1Desc, | ||
| const Matrix::Data< float > & | in2, | ||
| const Matrix::PartDescriptor & | in2Desc, | ||
| const raft::comms::comms_t & | comm, | ||
| cudaStream_t | stream, | ||
| int | root = 0, |
||
| bool | broadcastResult = true |
||
| ) |
| void MLCommon::LinAlg::opg::mm_aTa | ( | const raft::handle_t & | handle, |
| Matrix::Data< double > & | out, | ||
| const std::vector< Matrix::Data< double > * > & | in1, | ||
| const Matrix::PartDescriptor & | in1Desc, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
performs MNMG A^T x A calculation.
| [in] | handle | cuML handle object |
| [out] | out | resulting A^T x A matrix |
| [in] | in1 | input data |
| [in] | in1Desc | MNMG description of the input |
| [in] | streams | cuda streams |
| [in] | n_streams | number of streams |
| void MLCommon::LinAlg::opg::mm_aTa | ( | const raft::handle_t & | handle, |
| Matrix::Data< float > & | out, | ||
| const std::vector< Matrix::Data< float > * > & | in1, | ||
| const Matrix::PartDescriptor & | in1Desc, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::mv_aTb | ( | const raft::handle_t & | handle, |
| Matrix::Data< double > & | out, | ||
| const std::vector< Matrix::Data< double > * > & | A, | ||
| const Matrix::PartDescriptor & | ADesc, | ||
| const std::vector< Matrix::Data< double > * > & | b, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
performs MNMG A^T x b calculation.
| [in] | handle | cuML handle object |
| [out] | out | resulting A^T x b matrix |
| [in] | A | input data matrix |
| [in] | ADesc | MNMG description of A |
| [in] | b | input vector |
| [in] | streams | cuda streams |
| [in] | n_streams | number of streams |
| void MLCommon::LinAlg::opg::mv_aTb | ( | const raft::handle_t & | handle, |
| Matrix::Data< float > & | out, | ||
| const std::vector< Matrix::Data< float > * > & | A, | ||
| const Matrix::PartDescriptor & | ADesc, | ||
| const std::vector< Matrix::Data< float > * > & | b, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::svdEig | ( | const raft::handle_t & | handle, |
| const std::vector< Matrix::Data< double > * > & | A, | ||
| const Matrix::PartDescriptor & | ADesc, | ||
| std::vector< Matrix::Data< double > * > & | U, | ||
| double * | S, | ||
| double * | V, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
| void MLCommon::LinAlg::opg::svdEig | ( | const raft::handle_t & | handle, |
| const std::vector< Matrix::Data< float > * > & | A, | ||
| const Matrix::PartDescriptor & | ADesc, | ||
| std::vector< Matrix::Data< float > * > & | U, | ||
| float * | S, | ||
| float * | V, | ||
| cudaStream_t * | streams, | ||
| int | n_streams | ||
| ) |
performs MNMG Least squares calculation.