Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release.
Sparse Linear Algebra#
-
template<typename T>
size_t raft::sparse::linalg::csr_add_calc_inds(const int *a_ind, const int *a_indptr, const T *a_val, int nnz1, const int *b_ind, const int *b_indptr, const T *b_val, int nnz2, int m, int *out_ind, cudaStream_t stream)# Calculate the CSR row_ind array that would result from summing together two CSR matrices.
- Parameters:
a_ind – left hand row_ind array
a_indptr – left hand index_ptr array
a_val – left hand data array
nnz1 – size of left hand index_ptr and val arrays
b_ind – right hand row_ind array
b_indptr – right hand index_ptr array
b_val – right hand data array
nnz2 – size of right hand index_ptr and val arrays
m – size of output array (number of rows in final matrix)
out_ind – output row_ind array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_add_finalize(const int *a_ind, const int *a_indptr, const T *a_val, int nnz1, const int *b_ind, const int *b_indptr, const T *b_val, int nnz2, int m, int *c_ind, int *c_indptr, T *c_val, cudaStream_t stream)# Calculate the CSR row_ind array that would result from summing together two CSR matrices.
- Parameters:
a_ind – left hand row_ind array
a_indptr – left hand index_ptr array
a_val – left hand data array
nnz1 – size of left hand index_ptr and val arrays
b_ind – right hand row_ind array
b_indptr – right hand index_ptr array
b_val – right hand data array
nnz2 – size of right hand index_ptr and val arrays
m – size of output array (number of rows in final matrix)
c_ind – output row_ind array
c_indptr – output ind_ptr array
c_val – output data array
stream – cuda stream to use
-
template<typename T = int>
void raft::sparse::linalg::coo_degree(const T *rows, int nnz, T *results, cudaStream_t stream)# Count the number of values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
- Parameters:
rows – rows array of the COO matrix
nnz – size of the rows array
results – output result array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree(COO<T> *in, int *results, cudaStream_t stream)# Count the number of values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – type name of underlying values array
- Parameters:
in – input COO object for counting rows
results – output array with row counts (size=in->n_rows)
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_scalar(const int *rows, const T *vals, int nnz, T scalar, int *results, cudaStream_t stream = 0)# Count the number of values for each row that doesn’t match a particular scalar.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
rows – Input COO row array
vals – Input COO val arrays
nnz – size of input COO arrays
scalar – scalar to match for counting rows
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_scalar(COO<T> *in, T scalar, int *results, cudaStream_t stream)# Count the number of values for each row that doesn’t match a particular scalar.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
in – Input COO array
scalar – scalar to match for counting rows
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_nz(const int *rows, const T *vals, int nnz, int *results, cudaStream_t stream)# Count the number of nonzeros for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
rows – Input COO row array
vals – Input COO val arrays
nnz – size of input COO arrays
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_nz(COO<T> *in, int *results, cudaStream_t stream)# Count the number of nonzero values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
in – Input COO array
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_row_normalize_l1(const int *ia, const T *vals, int nnz, int m, T *result, cudaStream_t stream)# Perform L1 normalization on the rows of a given CSR-formatted sparse matrix.
- Parameters:
ia – row_ind array
vals – data array
nnz – size of data array
m – size of row_ind array
result – l1 normalized data array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_row_normalize_max(const int *ia, const T *vals, int nnz, int m, T *result, cudaStream_t stream)# Perform L_inf normalization on a given CSR-formatted sparse matrix.
- Parameters:
ia – row_ind array
vals – data array
nnz – size of data array
m – size of row_ind array
result – l1 normalized data array
stream – cuda stream to use
-
template<typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
void raft::sparse::linalg::rowNormCsr(raft::resources const &handle, const IdxType *ia, const Type *data, const IdxType nnz, const IdxType N, Type *norm, raft::linalg::NormType type, Lambda fin_op = raft::identity_op())# Compute row-wise norm of the input matrix and perform fin_op lambda.
Row-wise norm is useful while computing pairwise distance matrix, for example. This is used in many clustering algos like knn, kmeans, dbscan, etc…
- Template Parameters:
Type – the data type
Lambda – device final lambda
IdxType – Integer type used to for addressing
- Parameters:
handle – raft handle
ia – the input matrix row index array
data – the input matrix nnz data
nnz – number of elements in data
N – number of rows
norm – the output vector of row-wise norm, size [N]
type – the type of norm to be applied
fin_op – the final lambda op
-
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyA, typename LayoutPolicyB, typename OutputType>
void raft::sparse::linalg::sddmm(raft::resources const &handle, raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyA> A, raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyB> B, raft::device_csr_matrix_view<OutputType, IndexType, IndexType, NZType> C, const raft::linalg::Operation opA, const raft::linalg::Operation opB, raft::host_scalar_view<OutputType> alpha, raft::host_scalar_view<OutputType> beta)# This function performs the multiplication of dense matrix A and dense matrix B, followed by an element-wise multiplication with the sparsity pattern of C. It computes the following equation: C = alpha · (opA(A) * opB(B) ∘ spy(C)) + beta · C where A,B are device matrix views and C is a CSR device matrix view.
- Template Parameters:
ValueType – Data type of input/output matrices (float/double/half)
IndexType – Type of C
NZType – Type of C
LayoutPolicyA – layout of A
LayoutPolicyB – layout of B
OutputType – output type, equal to ValueType by default
- Parameters:
handle – [in] raft handle
A – [in] input raft::device_matrix_view
B – [in] input raft::device_matrix_view
C – [inout] output raft::device_csr_matrix_view
opA – [in] input Operation op(A)
opB – [in] input Operation op(B)
alpha – [in] input raft::host_scalar_view
beta – [in] input raft::host_scalar_view
-
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyY, typename LayoutPolicyZ>
void raft::sparse::linalg::spmm(raft::resources const &handle, const bool trans_x, const bool trans_y, const ValueType *alpha, raft::device_csr_matrix_view<const ValueType, int, int, NZType> x, raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyY> y, const ValueType *beta, raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z)# SPMM function designed for handling all CSR * DENSE combinations of operand layouts for cuSparse. It computes the following equation: Z = alpha . X * Y + beta . Z where X is a CSR device matrix view and Y,Z are device matrix views.
- Template Parameters:
ValueType – Data type of input/output matrices (float/double)
IndexType – Type of Y and Z
NZType – Type of X
LayoutPolicyY – layout of Y
LayoutPolicyZ – layout of Z
- Parameters:
handle – [in] raft handle
trans_x – [in] transpose operation for X
trans_y – [in] transpose operation for Y
alpha – [in] scalar
x – [in] input raft::device_csr_matrix_view
y – [in] input raft::device_matrix_view
beta – [in] scalar
z – [inout] input-output raft::device_matrix_view
-
template<typename T, typename Lambda>
void raft::sparse::linalg::coo_symmetrize(COO<T> *in, COO<T> *out, Lambda reduction_op, cudaStream_t stream)# takes a COO matrix which may not be symmetric and symmetrizes it, running a custom reduction function against the each value and its transposed value.
- Parameters:
in – Input COO matrix
out – Output symmetrized COO matrix
reduction_op – a custom reduction function
stream – cuda stream to use
- template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_find_size (const value_t __restrict__ *data, const value_idx __restrict__ *indices, const value_idx n, const int k, value_idx __restrict__ *row_sizes, value_idx __restrict__ *row_sizes2)
Find how much space needed in each row. We look through all datapoints and increment the count for each row.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
data – Input knn distances(n, k)
indices – Input knn indices(n, k)
n – Number of rows
k – Number of n_neighbors
row_sizes – Input empty row sum 1 array(n)
row_sizes2 – Input empty row sum 2 array(n) for faster reduction
- template<typename value_idx> RAFT_KERNEL raft::sparse::linalg::reduce_find_size (const value_idx n, const int k, value_idx __restrict__ *row_sizes, const value_idx __restrict__ *row_sizes2)
Reduce sum(row_sizes) + k Reduction for symmetric_find_size kernel. Allows algo to be faster.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
n – Number of rows
k – Number of n_neighbors
row_sizes – Input row sum 1 array(n)
row_sizes2 – Input row sum 2 array(n) for faster reduction
- template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_sum (value_idx *__restrict__ edges, const value_t *__restrict__ data, const value_idx *__restrict__ indices, value_t *__restrict__ VAL, value_idx *__restrict__ COL, value_idx *__restrict__ ROW, const value_idx n, const int k)
Perform data + data.T operation. Can only run once row_sizes from the CSR matrix of data + data.T has been determined.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
edges – Input row sum array(n) after reduction
data – Input knn distances(n, k)
indices – Input knn indices(n, k)
VAL – Output values for data + data.T
COL – Output column indices for data + data.T
ROW – Output row indices for data + data.T
n – Number of rows
k – Number of n_neighbors
- template<typename value_idx = int64_t, typename value_t = float, int TPB_X = 32, int TPB_Y = 32> void raft::sparse::linalg::from_knn_symmetrize_matrix (const value_idx *__restrict__ knn_indices, const value_t *__restrict__ knn_dists, const value_idx n, const int k, COO< value_t, value_idx > *out, cudaStream_t stream)
Perform data + data.T on raw KNN data. The following steps are invoked: (1) Find how much space needed in each row (2) Compute final space needed (n*k + sum(row_sizes)) == 2*n*k (3) Allocate new space (4) Prepare edges for each new row (5) Perform final data + data.T operation (6) Return summed up VAL, COL, ROW.
TODO: This isn’t generalized. Remove in place of
symmetrize()
- Parameters:
knn_indices – Input knn distances(n, k)
knn_dists – Input knn indices(n, k)
n – Number of rows
k – Number of n_neighbors
out – Output COO Matrix class
stream – Input cuda stream
-
template<typename value_idx, typename value_t>
void raft::sparse::linalg::symmetrize(raft::resources const &handle, const value_idx *rows, const value_idx *cols, const value_t *vals, size_t m, size_t n, size_t nnz, raft::sparse::COO<value_t, value_idx> &out)# Symmetrizes a COO matrix
-
template<typename value_idx, typename value_t>
void raft::sparse::linalg::csr_transpose(raft::resources const &handle, const value_idx *csr_indptr, const value_idx *csr_indices, const value_t *csr_data, value_idx *csc_indptr, value_idx *csc_indices, value_t *csc_data, value_idx csr_nrows, value_idx csr_ncols, value_idx nnz, cudaStream_t stream)# Transpose a set of CSR arrays into a set of CSC arrays.
- Template Parameters:
value_idx – : data type of the CSR index arrays
value_t – : data type of the CSR data array
- Parameters:
handle – [in] : used for invoking cusparse
csr_indptr – [in] : CSR row index array
csr_indices – [in] : CSR column indices array
csr_data – [in] : CSR data array
csc_indptr – [out] : CSC row index array
csc_indices – [out] : CSC column indices array
csc_data – [out] : CSC data array
csr_nrows – [in] : Number of rows in CSR
csr_ncols – [in] : Number of columns in CSR
nnz – [in] : Number of nonzeros of CSR
stream – [in] : Cuda stream for ordering events