Namespaces | |
Batched | |
Functions | |
float | r2_score_py (const raft::handle_t &handle, float *y, float *y_hat, int n) |
double | r2_score_py (const raft::handle_t &handle, double *y, double *y_hat, int n) |
double | rand_index (const raft::handle_t &handle, double *y, double *y_hat, int n) |
double | silhouette_score (const raft::handle_t &handle, double *y, int nRows, int nCols, int *labels, int nLabels, double *silScores, cuvs::distance::DistanceType metric) |
double | kl_divergence (const raft::handle_t &handle, const double *y, const double *y_hat, int n) |
float | kl_divergence (const raft::handle_t &handle, const float *y, const float *y_hat, int n) |
double | entropy (const raft::handle_t &handle, const int *y, const int n, const int lower_class_range, const int upper_class_range) |
double | mutual_info_score (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range) |
double | homogeneity_score (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range) |
double | completeness_score (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range) |
double | v_measure (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range, double beta) |
float | accuracy_score_py (const raft::handle_t &handle, const int *predictions, const int *ref_predictions, int n) |
void | pairwise_distance (const raft::handle_t &handle, const double *x, const double *y, double *dist, int m, int n, int k, cuvs::distance::DistanceType metric, bool isRowMajor=true, double metric_arg=2.0) |
Calculates the ij pairwise distances between two input arrays of double type. More... | |
void | pairwise_distance (const raft::handle_t &handle, const float *x, const float *y, float *dist, int m, int n, int k, cuvs::distance::DistanceType metric, bool isRowMajor=true, float metric_arg=2.0f) |
Calculates the ij pairwise distances between two input arrays of float type. More... | |
void | pairwiseDistance_sparse (const raft::handle_t &handle, double *x, double *y, double *dist, int x_nrows, int y_nrows, int n_cols, int x_nnz, int y_nnz, int *x_indptr, int *y_indptr, int *x_indices, int *y_indices, cuvs::distance::DistanceType metric, float metric_arg) |
void | pairwiseDistance_sparse (const raft::handle_t &handle, float *x, float *y, float *dist, int x_nrows, int y_nrows, int n_cols, int x_nnz, int y_nnz, int *x_indptr, int *y_indptr, int *x_indices, int *y_indices, cuvs::distance::DistanceType metric, float metric_arg) |
template<typename math_t , cuvs::distance::DistanceType distance_type> | |
double | trustworthiness_score (const raft::handle_t &h, const math_t *X, math_t *X_embedded, int n, int m, int d, int n_neighbors, int batchSize=512) |
Compute the trustworthiness score. More... | |
double | adjusted_rand_index (const raft::handle_t &handle, const int64_t *y, const int64_t *y_hat, const int64_t n) |
double | adjusted_rand_index (const raft::handle_t &handle, const int *y, const int *y_hat, const int n) |
float ML::Metrics::accuracy_score_py | ( | const raft::handle_t & | handle, |
const int * | predictions, | ||
const int * | ref_predictions, | ||
int | n | ||
) |
Calculates the "accuracy" between two input numpy arrays/ cudf series
The accuracy metric is used to calculate the accuracy of the predict labels predict labels
handle | raft::handle_t |
predictions | predicted labels |
ref_predictions | truth labels |
n | Number of elements in y and y_hat |
double ML::Metrics::adjusted_rand_index | ( | const raft::handle_t & | handle, |
const int * | y, | ||
const int * | y_hat, | ||
const int | n | ||
) |
double ML::Metrics::adjusted_rand_index | ( | const raft::handle_t & | handle, |
const int64_t * | y, | ||
const int64_t * | y_hat, | ||
const int64_t | n | ||
) |
Calculates the "adjusted rand index"
This metric is the corrected-for-chance version of the rand index
handle | raft::handle_t |
y | Array of response variables of the first clustering classifications |
y_hat | Array of response variables of the second clustering classifications |
n | Number of elements in y and y_hat |
double ML::Metrics::completeness_score | ( | const raft::handle_t & | handle, |
const int * | y, | ||
const int * | y_hat, | ||
const int | n, | ||
const int | lower_class_range, | ||
const int | upper_class_range | ||
) |
Calculates the "completeness score" between two clusters
A clustering result satisfies completeness if all the data points that are members of a given class are elements of the same cluster.
handle | raft::handle_t |
y | truth labels |
y_hat | predicted labels |
n | Number of elements in y and y_hat |
lower_class_range | the lowest value in the range of classes |
upper_class_range | the highest value in the range of classes |
double ML::Metrics::entropy | ( | const raft::handle_t & | handle, |
const int * | y, | ||
const int | n, | ||
const int | lower_class_range, | ||
const int | upper_class_range | ||
) |
Calculates the "entropy" of a labelling
This metric is a measure of the purity/polarity of the clustering
handle | raft::handle_t |
y | Array of response variables of the clustering |
n | Number of elements in y |
lower_class_range | the lowest value in the range of classes |
upper_class_range | the highest value in the range of classes |
double ML::Metrics::homogeneity_score | ( | const raft::handle_t & | handle, |
const int * | y, | ||
const int * | y_hat, | ||
const int | n, | ||
const int | lower_class_range, | ||
const int | upper_class_range | ||
) |
Calculates the "homogeneity score" between two clusters
A clustering result satisfies homogeneity if all of its clusters contain only data points which are members of a single class.
handle | raft::handle_t |
y | truth labels |
y_hat | predicted labels |
n | Number of elements in y and y_hat |
lower_class_range | the lowest value in the range of classes |
upper_class_range | the highest value in the range of classes |
double ML::Metrics::kl_divergence | ( | const raft::handle_t & | handle, |
const double * | y, | ||
const double * | y_hat, | ||
int | n | ||
) |
Calculates the "Kullback-Leibler Divergence"
The KL divergence tells us how well the probability distribution Q approximates the probability distribution P It is often also used as a 'distance metric' between two probability distributions (not symmetric)
handle | raft::handle_t |
y | Array of probabilities corresponding to distribution P |
y_hat | Array of probabilities corresponding to distribution Q |
n | Number of elements in y and y_hat |
float ML::Metrics::kl_divergence | ( | const raft::handle_t & | handle, |
const float * | y, | ||
const float * | y_hat, | ||
int | n | ||
) |
Calculates the "Kullback-Leibler Divergence"
The KL divergence tells us how well the probability distribution Q approximates the probability distribution P It is often also used as a 'distance metric' between two probability distributions (not symmetric)
handle | raft::handle_t |
y | Array of probabilities corresponding to distribution P |
y_hat | Array of probabilities corresponding to distribution Q |
n | Number of elements in y and y_hat |
double ML::Metrics::mutual_info_score | ( | const raft::handle_t & | handle, |
const int * | y, | ||
const int * | y_hat, | ||
const int | n, | ||
const int | lower_class_range, | ||
const int | upper_class_range | ||
) |
Calculates the "Mutual Information score" between two clusters
Mutual Information is a measure of the similarity between two labels of the same data.
handle | raft::handle_t |
y | Array of response variables of the first clustering classifications |
y_hat | Array of response variables of the second clustering classifications |
n | Number of elements in y and y_hat |
lower_class_range | the lowest value in the range of classes |
upper_class_range | the highest value in the range of classes |
void ML::Metrics::pairwise_distance | ( | const raft::handle_t & | handle, |
const double * | x, | ||
const double * | y, | ||
double * | dist, | ||
int | m, | ||
int | n, | ||
int | k, | ||
cuvs::distance::DistanceType | metric, | ||
bool | isRowMajor = true , |
||
double | metric_arg = 2.0 |
||
) |
Calculates the ij pairwise distances between two input arrays of double type.
handle | raft::handle_t |
x | pointer to the input data samples array (mRows x kCols) |
y | pointer to the second input data samples array. Can use the same pointer as x (nRows x kCols) |
dist | output pointer where the results will be stored (mRows x nCols) |
m | number of rows in x |
n | number of rows in y |
k | number of cols in x and y (must be the same) |
metric | the distance metric to use for the calculation |
isRowMajor | specifies whether the x and y data pointers are row (C type array) or col (F type array) major |
metric_arg | the value of p for Minkowski (l-p) distances. |
void ML::Metrics::pairwise_distance | ( | const raft::handle_t & | handle, |
const float * | x, | ||
const float * | y, | ||
float * | dist, | ||
int | m, | ||
int | n, | ||
int | k, | ||
cuvs::distance::DistanceType | metric, | ||
bool | isRowMajor = true , |
||
float | metric_arg = 2.0f |
||
) |
Calculates the ij pairwise distances between two input arrays of float type.
handle | raft::handle_t |
x | pointer to the input data samples array (mRows x kCols) |
y | pointer to the second input data samples array. Can use the same pointer as x (nRows x kCols) |
dist | output pointer where the results will be stored (mRows x nCols) |
m | number of rows in x |
n | number of rows in y |
k | number of cols in x and y (must be the same) |
metric | the distance metric to use for the calculation |
isRowMajor | specifies whether the x and y data pointers are row (C type array) or col (F type array) major |
metric_arg | the value of p for Minkowski (l-p) distances. |
void ML::Metrics::pairwiseDistance_sparse | ( | const raft::handle_t & | handle, |
double * | x, | ||
double * | y, | ||
double * | dist, | ||
int | x_nrows, | ||
int | y_nrows, | ||
int | n_cols, | ||
int | x_nnz, | ||
int | y_nnz, | ||
int * | x_indptr, | ||
int * | y_indptr, | ||
int * | x_indices, | ||
int * | y_indices, | ||
cuvs::distance::DistanceType | metric, | ||
float | metric_arg | ||
) |
void ML::Metrics::pairwiseDistance_sparse | ( | const raft::handle_t & | handle, |
float * | x, | ||
float * | y, | ||
float * | dist, | ||
int | x_nrows, | ||
int | y_nrows, | ||
int | n_cols, | ||
int | x_nnz, | ||
int | y_nnz, | ||
int * | x_indptr, | ||
int * | y_indptr, | ||
int * | x_indices, | ||
int * | y_indices, | ||
cuvs::distance::DistanceType | metric, | ||
float | metric_arg | ||
) |
double ML::Metrics::r2_score_py | ( | const raft::handle_t & | handle, |
double * | y, | ||
double * | y_hat, | ||
int | n | ||
) |
Calculates the "Coefficient of Determination" (R-Squared) score normalizing the sum of squared errors by the total sum of squares with double precision.
This score indicates the proportionate amount of variation in an expected response variable is explained by the independent variables in a linear regression model. The larger the R-squared value, the more variability is explained by the linear regression model.
handle | raft::handle_t |
y | Array of ground-truth response variables |
y_hat | Array of predicted response variables |
n | Number of elements in y and y_hat |
float ML::Metrics::r2_score_py | ( | const raft::handle_t & | handle, |
float * | y, | ||
float * | y_hat, | ||
int | n | ||
) |
Calculates the "Coefficient of Determination" (R-Squared) score normalizing the sum of squared errors by the total sum of squares with single precision.
This score indicates the proportionate amount of variation in an expected response variable is explained by the independent variables in a linear regression model. The larger the R-squared value, the more variability is explained by the linear regression model.
handle | raft::handle_t |
y | Array of ground-truth response variables |
y_hat | Array of predicted response variables |
n | Number of elements in y and y_hat |
double ML::Metrics::rand_index | ( | const raft::handle_t & | handle, |
double * | y, | ||
double * | y_hat, | ||
int | n | ||
) |
Calculates the "rand index"
This metric is a measure of similarity between two data clusterings.
handle | raft::handle_t |
y | Array of response variables of the first clustering classifications |
y_hat | Array of response variables of the second clustering classifications |
n | Number of elements in y and y_hat |
double ML::Metrics::silhouette_score | ( | const raft::handle_t & | handle, |
double * | y, | ||
int | nRows, | ||
int | nCols, | ||
int * | labels, | ||
int | nLabels, | ||
double * | silScores, | ||
cuvs::distance::DistanceType | metric | ||
) |
Calculates the "Silhouette Score"
The Silhouette Coefficient is calculated using the mean intra-cluster distance (a) and the mean nearest-cluster distance (b) for each sample. The Silhouette Coefficient for a sample is (b - a) / max(a, b). To clarify, b is the distance between a sample and the nearest cluster that the sample is not a part of. Note that Silhouette Coefficient is only defined if number of labels is 2 <= n_labels <= n_samples - 1.
handle | raft::handle_t |
y | Array of data samples with dimensions (nRows x nCols) |
nRows | number of data samples |
nCols | number of features |
labels | Array containing labels for every data sample (1 x nRows) |
nLabels | number of Labels |
metric | the numerical value that maps to the type of distance metric to be used in the calculations |
silScores | Array that is optionally taken in as input if required to be populated with the silhouette score for every sample (1 x nRows), else nullptr is passed |
double ML::Metrics::trustworthiness_score | ( | const raft::handle_t & | h, |
const math_t * | X, | ||
math_t * | X_embedded, | ||
int | n, | ||
int | m, | ||
int | d, | ||
int | n_neighbors, | ||
int | batchSize = 512 |
||
) |
Compute the trustworthiness score.
h | Raft handle |
X | Data in original dimension |
X_embedded | Data in target dimension (embedding) |
n | Number of samples |
m | Number of features in high/original dimension |
d | Number of features in low/embedded dimension |
n_neighbors | Number of neighbors considered by trustworthiness score |
batchSize | Batch size |
distance_type | Distance type to consider |
double ML::Metrics::v_measure | ( | const raft::handle_t & | handle, |
const int * | y, | ||
const int * | y_hat, | ||
const int | n, | ||
const int | lower_class_range, | ||
const int | upper_class_range, | ||
double | beta | ||
) |
Calculates the "v-measure" between two clusters
v-measure is the harmonic mean between the homogeneity and completeness scores of 2 cluster classifications
handle | raft::handle_t |
y | truth labels |
y_hat | predicted labels |
n | Number of elements in y and y_hat |
lower_class_range | the lowest value in the range of classes |
upper_class_range | the highest value in the range of classes |
beta | Ratio of weight attributed to homogeneity vs completeness |