Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.08 (August) release.
Probability & Information Theory#
Contingency Matrix#
#include <raft/stats/contingency_matrix.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t>
void get_input_class_cardinality(raft::resources const &handle, raft::device_vector_view<const value_t, idx_t> groundTruth, raft::host_scalar_view<value_t> minLabel, raft::host_scalar_view<value_t> maxLabel)# use this to allocate output matrix size size of matrix = (maxLabel - minLabel + 1)^2 * sizeof(int)
- Template Parameters:
value_t – label type
idx_t – Index type of matrix extent.
- Parameters:
handle – [in] the raft handle.
groundTruth – [in] device 1-d array for ground truth (num of rows)
minLabel – [out] calculated min value in input array
maxLabel – [out] calculated max value in input array
-
template<typename value_t, typename out_t, typename idx_t, typename layout_t, typename opt_min_label_t, typename opt_max_label_t>
void contingency_matrix(raft::resources const &handle, raft::device_vector_view<const value_t, idx_t> ground_truth, raft::device_vector_view<const value_t, idx_t> predicted_label, raft::device_matrix_view<out_t, idx_t, layout_t> out_mat, opt_min_label_t &&opt_min_label, opt_max_label_t &&opt_max_label)# construct contingency matrix given input ground truth and prediction labels. Users should call function getInputClassCardinality to find and allocate memory for output. Similarly workspace requirements should be checked using function getContingencyMatrixWorkspaceSize
- Template Parameters:
value_t – label type
out_t – output matrix type
idx_t – Index type of matrix extent.
layout_t – Layout type of the input data.
opt_min_label_t – std::optional<value_t>
opt_min_label
opt_max_label_t – std::optional<value_t>
opt_max_label
- Parameters:
handle – [in] the raft handle.
ground_truth – [in] device 1-d array for ground truth (num of rows)
predicted_label – [in] device 1-d array for prediction (num of columns)
out_mat – [out] output buffer for contingency matrix
opt_min_label – [in] std::optional, min value in input ground truth array
opt_max_label – [in] std::optional, max value in input ground truth array
Entropy#
#include <raft/stats/entropy.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t>
double entropy(raft::resources const &handle, raft::device_vector_view<const value_t, idx_t> cluster_array, const value_t lower_label_range, const value_t upper_label_range)# Function to calculate entropy more info on entropy
- Template Parameters:
value_t – data type
idx_t – index type
- Parameters:
handle – [in] the raft handle
cluster_array – [in] the array of classes of type value_t
lower_label_range – [in] the lower bound of the range of labels
upper_label_range – [in] the upper bound of the range of labels
- Returns:
the entropy score
KL-Divergence#
#include <raft/stats/kl_divergence.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t>
value_t kl_divergence(raft::resources const &handle, raft::device_vector_view<const value_t, idx_t> modelPDF, raft::device_vector_view<const value_t, idx_t> candidatePDF)# Function to calculate KL Divergence more info on KL Divergence
- Template Parameters:
value_t – Data type of the input array
idx_t – index type
- Parameters:
handle – [in] the raft handle
modelPDF – [in] the model array of probability density functions of type value_t
candidatePDF – [in] the candidate array of probability density functions of type value_t
- Returns:
the KL Divergence value
Mutual Information#
#include <raft/stats/mutual_info_score.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t>
double mutual_info_score(raft::resources const &handle, raft::device_vector_view<const value_t, idx_t> first_cluster_array, raft::device_vector_view<const value_t, idx_t> second_cluster_array, value_t lower_label_range, value_t upper_label_range)# Function to calculate the mutual information between two clusters more info on mutual information
- Template Parameters:
value_t – the data type
idx_t – index type
- Parameters:
handle – [in] the raft handle
first_cluster_array – [in] the array of classes of type value_t
second_cluster_array – [in] the array of classes of type value_t
lower_label_range – [in] the lower bound of the range of labels
upper_label_range – [in] the upper bound of the range of labels
- Returns:
the mutual information score