Namespaces | Functions
ML::Metrics Namespace Reference

Namespaces

 Batched
 

Functions

float r2_score_py (const raft::handle_t &handle, float *y, float *y_hat, int n)
 
double r2_score_py (const raft::handle_t &handle, double *y, double *y_hat, int n)
 
double rand_index (const raft::handle_t &handle, double *y, double *y_hat, int n)
 
double silhouette_score (const raft::handle_t &handle, double *y, int nRows, int nCols, int *labels, int nLabels, double *silScores, cuvs::distance::DistanceType metric)
 
double kl_divergence (const raft::handle_t &handle, const double *y, const double *y_hat, int n)
 
float kl_divergence (const raft::handle_t &handle, const float *y, const float *y_hat, int n)
 
double entropy (const raft::handle_t &handle, const int *y, const int n, const int lower_class_range, const int upper_class_range)
 
double mutual_info_score (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range)
 
double homogeneity_score (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range)
 
double completeness_score (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range)
 
double v_measure (const raft::handle_t &handle, const int *y, const int *y_hat, const int n, const int lower_class_range, const int upper_class_range, double beta)
 
float accuracy_score_py (const raft::handle_t &handle, const int *predictions, const int *ref_predictions, int n)
 
void pairwise_distance (const raft::handle_t &handle, const double *x, const double *y, double *dist, int m, int n, int k, cuvs::distance::DistanceType metric, bool isRowMajor=true, double metric_arg=2.0)
 Calculates the ij pairwise distances between two input arrays of double type. More...
 
void pairwise_distance (const raft::handle_t &handle, const float *x, const float *y, float *dist, int m, int n, int k, cuvs::distance::DistanceType metric, bool isRowMajor=true, float metric_arg=2.0f)
 Calculates the ij pairwise distances between two input arrays of float type. More...
 
void pairwiseDistance_sparse (const raft::handle_t &handle, double *x, double *y, double *dist, int x_nrows, int y_nrows, int n_cols, int x_nnz, int y_nnz, int *x_indptr, int *y_indptr, int *x_indices, int *y_indices, cuvs::distance::DistanceType metric, float metric_arg)
 
void pairwiseDistance_sparse (const raft::handle_t &handle, float *x, float *y, float *dist, int x_nrows, int y_nrows, int n_cols, int x_nnz, int y_nnz, int *x_indptr, int *y_indptr, int *x_indices, int *y_indices, cuvs::distance::DistanceType metric, float metric_arg)
 
template<typename math_t , cuvs::distance::DistanceType distance_type>
double trustworthiness_score (const raft::handle_t &h, const math_t *X, math_t *X_embedded, int n, int m, int d, int n_neighbors, int batchSize=512)
 Compute the trustworthiness score. More...
 
double adjusted_rand_index (const raft::handle_t &handle, const int64_t *y, const int64_t *y_hat, const int64_t n)
 
double adjusted_rand_index (const raft::handle_t &handle, const int *y, const int *y_hat, const int n)
 

Function Documentation

◆ accuracy_score_py()

float ML::Metrics::accuracy_score_py ( const raft::handle_t &  handle,
const int *  predictions,
const int *  ref_predictions,
int  n 
)

Calculates the "accuracy" between two input numpy arrays/ cudf series

The accuracy metric is used to calculate the accuracy of the predict labels predict labels

Parameters
handleraft::handle_t
predictionspredicted labels
ref_predictionstruth labels
nNumber of elements in y and y_hat
Returns
: The accuracy

◆ adjusted_rand_index() [1/2]

double ML::Metrics::adjusted_rand_index ( const raft::handle_t &  handle,
const int *  y,
const int *  y_hat,
const int  n 
)

◆ adjusted_rand_index() [2/2]

double ML::Metrics::adjusted_rand_index ( const raft::handle_t &  handle,
const int64_t *  y,
const int64_t *  y_hat,
const int64_t  n 
)

Calculates the "adjusted rand index"

This metric is the corrected-for-chance version of the rand index

Parameters
handleraft::handle_t
yArray of response variables of the first clustering classifications
y_hatArray of response variables of the second clustering classifications
nNumber of elements in y and y_hat
Returns
: The adjusted rand index value

◆ completeness_score()

double ML::Metrics::completeness_score ( const raft::handle_t &  handle,
const int *  y,
const int *  y_hat,
const int  n,
const int  lower_class_range,
const int  upper_class_range 
)

Calculates the "completeness score" between two clusters

A clustering result satisfies completeness if all the data points that are members of a given class are elements of the same cluster.

Parameters
handleraft::handle_t
ytruth labels
y_hatpredicted labels
nNumber of elements in y and y_hat
lower_class_rangethe lowest value in the range of classes
upper_class_rangethe highest value in the range of classes
Returns
: The completeness score

◆ entropy()

double ML::Metrics::entropy ( const raft::handle_t &  handle,
const int *  y,
const int  n,
const int  lower_class_range,
const int  upper_class_range 
)

Calculates the "entropy" of a labelling

This metric is a measure of the purity/polarity of the clustering

Parameters
handleraft::handle_t
yArray of response variables of the clustering
nNumber of elements in y
lower_class_rangethe lowest value in the range of classes
upper_class_rangethe highest value in the range of classes
Returns
: The entropy value of the clustering

◆ homogeneity_score()

double ML::Metrics::homogeneity_score ( const raft::handle_t &  handle,
const int *  y,
const int *  y_hat,
const int  n,
const int  lower_class_range,
const int  upper_class_range 
)

Calculates the "homogeneity score" between two clusters

A clustering result satisfies homogeneity if all of its clusters contain only data points which are members of a single class.

Parameters
handleraft::handle_t
ytruth labels
y_hatpredicted labels
nNumber of elements in y and y_hat
lower_class_rangethe lowest value in the range of classes
upper_class_rangethe highest value in the range of classes
Returns
: The homogeneity score

◆ kl_divergence() [1/2]

double ML::Metrics::kl_divergence ( const raft::handle_t &  handle,
const double *  y,
const double *  y_hat,
int  n 
)

Calculates the "Kullback-Leibler Divergence"

The KL divergence tells us how well the probability distribution Q approximates the probability distribution P It is often also used as a 'distance metric' between two probability distributions (not symmetric)

Parameters
handleraft::handle_t
yArray of probabilities corresponding to distribution P
y_hatArray of probabilities corresponding to distribution Q
nNumber of elements in y and y_hat
Returns
: The KL Divergence value

◆ kl_divergence() [2/2]

float ML::Metrics::kl_divergence ( const raft::handle_t &  handle,
const float *  y,
const float *  y_hat,
int  n 
)

Calculates the "Kullback-Leibler Divergence"

The KL divergence tells us how well the probability distribution Q approximates the probability distribution P It is often also used as a 'distance metric' between two probability distributions (not symmetric)

Parameters
handleraft::handle_t
yArray of probabilities corresponding to distribution P
y_hatArray of probabilities corresponding to distribution Q
nNumber of elements in y and y_hat
Returns
: The KL Divergence value

◆ mutual_info_score()

double ML::Metrics::mutual_info_score ( const raft::handle_t &  handle,
const int *  y,
const int *  y_hat,
const int  n,
const int  lower_class_range,
const int  upper_class_range 
)

Calculates the "Mutual Information score" between two clusters

Mutual Information is a measure of the similarity between two labels of the same data.

Parameters
handleraft::handle_t
yArray of response variables of the first clustering classifications
y_hatArray of response variables of the second clustering classifications
nNumber of elements in y and y_hat
lower_class_rangethe lowest value in the range of classes
upper_class_rangethe highest value in the range of classes
Returns
: The mutual information score

◆ pairwise_distance() [1/2]

void ML::Metrics::pairwise_distance ( const raft::handle_t &  handle,
const double *  x,
const double *  y,
double *  dist,
int  m,
int  n,
int  k,
cuvs::distance::DistanceType  metric,
bool  isRowMajor = true,
double  metric_arg = 2.0 
)

Calculates the ij pairwise distances between two input arrays of double type.

Parameters
handleraft::handle_t
xpointer to the input data samples array (mRows x kCols)
ypointer to the second input data samples array. Can use the same pointer as x (nRows x kCols)
distoutput pointer where the results will be stored (mRows x nCols)
mnumber of rows in x
nnumber of rows in y
knumber of cols in x and y (must be the same)
metricthe distance metric to use for the calculation
isRowMajorspecifies whether the x and y data pointers are row (C type array) or col (F type array) major
metric_argthe value of p for Minkowski (l-p) distances.

◆ pairwise_distance() [2/2]

void ML::Metrics::pairwise_distance ( const raft::handle_t &  handle,
const float *  x,
const float *  y,
float *  dist,
int  m,
int  n,
int  k,
cuvs::distance::DistanceType  metric,
bool  isRowMajor = true,
float  metric_arg = 2.0f 
)

Calculates the ij pairwise distances between two input arrays of float type.

Parameters
handleraft::handle_t
xpointer to the input data samples array (mRows x kCols)
ypointer to the second input data samples array. Can use the same pointer as x (nRows x kCols)
distoutput pointer where the results will be stored (mRows x nCols)
mnumber of rows in x
nnumber of rows in y
knumber of cols in x and y (must be the same)
metricthe distance metric to use for the calculation
isRowMajorspecifies whether the x and y data pointers are row (C type array) or col (F type array) major
metric_argthe value of p for Minkowski (l-p) distances.

◆ pairwiseDistance_sparse() [1/2]

void ML::Metrics::pairwiseDistance_sparse ( const raft::handle_t &  handle,
double *  x,
double *  y,
double *  dist,
int  x_nrows,
int  y_nrows,
int  n_cols,
int  x_nnz,
int  y_nnz,
int *  x_indptr,
int *  y_indptr,
int *  x_indices,
int *  y_indices,
cuvs::distance::DistanceType  metric,
float  metric_arg 
)

◆ pairwiseDistance_sparse() [2/2]

void ML::Metrics::pairwiseDistance_sparse ( const raft::handle_t &  handle,
float *  x,
float *  y,
float *  dist,
int  x_nrows,
int  y_nrows,
int  n_cols,
int  x_nnz,
int  y_nnz,
int *  x_indptr,
int *  y_indptr,
int *  x_indices,
int *  y_indices,
cuvs::distance::DistanceType  metric,
float  metric_arg 
)

◆ r2_score_py() [1/2]

double ML::Metrics::r2_score_py ( const raft::handle_t &  handle,
double *  y,
double *  y_hat,
int  n 
)

Calculates the "Coefficient of Determination" (R-Squared) score normalizing the sum of squared errors by the total sum of squares with double precision.

This score indicates the proportionate amount of variation in an expected response variable is explained by the independent variables in a linear regression model. The larger the R-squared value, the more variability is explained by the linear regression model.

Parameters
handleraft::handle_t
yArray of ground-truth response variables
y_hatArray of predicted response variables
nNumber of elements in y and y_hat
Returns
: The R-squared value.

◆ r2_score_py() [2/2]

float ML::Metrics::r2_score_py ( const raft::handle_t &  handle,
float *  y,
float *  y_hat,
int  n 
)

Calculates the "Coefficient of Determination" (R-Squared) score normalizing the sum of squared errors by the total sum of squares with single precision.

This score indicates the proportionate amount of variation in an expected response variable is explained by the independent variables in a linear regression model. The larger the R-squared value, the more variability is explained by the linear regression model.

Parameters
handleraft::handle_t
yArray of ground-truth response variables
y_hatArray of predicted response variables
nNumber of elements in y and y_hat
Returns
: The R-squared value.

◆ rand_index()

double ML::Metrics::rand_index ( const raft::handle_t &  handle,
double *  y,
double *  y_hat,
int  n 
)

Calculates the "rand index"

This metric is a measure of similarity between two data clusterings.

Parameters
handleraft::handle_t
yArray of response variables of the first clustering classifications
y_hatArray of response variables of the second clustering classifications
nNumber of elements in y and y_hat
Returns
: The rand index value

◆ silhouette_score()

double ML::Metrics::silhouette_score ( const raft::handle_t &  handle,
double *  y,
int  nRows,
int  nCols,
int *  labels,
int  nLabels,
double *  silScores,
cuvs::distance::DistanceType  metric 
)

Calculates the "Silhouette Score"

The Silhouette Coefficient is calculated using the mean intra-cluster distance (a) and the mean nearest-cluster distance (b) for each sample. The Silhouette Coefficient for a sample is (b - a) / max(a, b). To clarify, b is the distance between a sample and the nearest cluster that the sample is not a part of. Note that Silhouette Coefficient is only defined if number of labels is 2 <= n_labels <= n_samples - 1.

Parameters
handleraft::handle_t
yArray of data samples with dimensions (nRows x nCols)
nRowsnumber of data samples
nColsnumber of features
labelsArray containing labels for every data sample (1 x nRows)
nLabelsnumber of Labels
metricthe numerical value that maps to the type of distance metric to be used in the calculations
silScoresArray that is optionally taken in as input if required to be populated with the silhouette score for every sample (1 x nRows), else nullptr is passed

◆ trustworthiness_score()

template<typename math_t , cuvs::distance::DistanceType distance_type>
double ML::Metrics::trustworthiness_score ( const raft::handle_t &  h,
const math_t *  X,
math_t *  X_embedded,
int  n,
int  m,
int  d,
int  n_neighbors,
int  batchSize = 512 
)

Compute the trustworthiness score.

Parameters
hRaft handle
XData in original dimension
X_embeddedData in target dimension (embedding)
nNumber of samples
mNumber of features in high/original dimension
dNumber of features in low/embedded dimension
n_neighborsNumber of neighbors considered by trustworthiness score
batchSizeBatch size
Template Parameters
distance_typeDistance type to consider
Returns
Trustworthiness score

◆ v_measure()

double ML::Metrics::v_measure ( const raft::handle_t &  handle,
const int *  y,
const int *  y_hat,
const int  n,
const int  lower_class_range,
const int  upper_class_range,
double  beta 
)

Calculates the "v-measure" between two clusters

v-measure is the harmonic mean between the homogeneity and completeness scores of 2 cluster classifications

Parameters
handleraft::handle_t
ytruth labels
y_hatpredicted labels
nNumber of elements in y and y_hat
lower_class_rangethe lowest value in the range of classes
upper_class_rangethe highest value in the range of classes
betaRatio of weight attributed to homogeneity vs completeness
Returns
: The v-measure