Namespaces
	Batched

Functions
float	r2_score_py (const raft::handle_t &handle, float y, float y_hat, int n)

double	r2_score_py (const raft::handle_t &handle, double y, double y_hat, int n)

double	rand_index (const raft::handle_t &handle, double y, double y_hat, int n)

double	silhouette_score (const raft::handle_t &handle, double y, int nRows, int nCols, int labels, int nLabels, double *silScores, cuvs::distance::DistanceType metric)

double	kl_divergence (const raft::handle_t &handle, const double y, const double y_hat, int n)

float	kl_divergence (const raft::handle_t &handle, const float y, const float y_hat, int n)

double	entropy (const raft::handle_t &handle, const int *y, const int n, const int lower_class_range, const int upper_class_range)

double	mutual_info_score (const raft::handle_t &handle, const int y, const int y_hat, const int n, const int lower_class_range, const int upper_class_range)

double	homogeneity_score (const raft::handle_t &handle, const int y, const int y_hat, const int n, const int lower_class_range, const int upper_class_range)

double	completeness_score (const raft::handle_t &handle, const int y, const int y_hat, const int n, const int lower_class_range, const int upper_class_range)

double	v_measure (const raft::handle_t &handle, const int y, const int y_hat, const int n, const int lower_class_range, const int upper_class_range, double beta)

float	accuracy_score_py (const raft::handle_t &handle, const int predictions, const int ref_predictions, int n)

void	pairwise_distance (const raft::handle_t &handle, const double x, const double y, double *dist, int m, int n, int k, cuvs::distance::DistanceType metric, bool isRowMajor=true, double metric_arg=2.0)
	Calculates the ij pairwise distances between two input arrays of double type. More...

void	pairwise_distance (const raft::handle_t &handle, const float x, const float y, float *dist, int m, int n, int k, cuvs::distance::DistanceType metric, bool isRowMajor=true, float metric_arg=2.0f)
	Calculates the ij pairwise distances between two input arrays of float type. More...

void	pairwiseDistance_sparse (const raft::handle_t &handle, double x, double y, double dist, int x_nrows, int y_nrows, int n_cols, int x_nnz, int y_nnz, int x_indptr, int y_indptr, int x_indices, int *y_indices, cuvs::distance::DistanceType metric, float metric_arg)

void	pairwiseDistance_sparse (const raft::handle_t &handle, float x, float y, float dist, int x_nrows, int y_nrows, int n_cols, int x_nnz, int y_nnz, int x_indptr, int y_indptr, int x_indices, int *y_indices, cuvs::distance::DistanceType metric, float metric_arg)

template<typename math_t , cuvs::distance::DistanceType distance_type>
double	trustworthiness_score (const raft::handle_t &h, const math_t X, math_t X_embedded, int n, int m, int d, int n_neighbors, int batchSize=512)
	Compute the trustworthiness score. More...


double	adjusted_rand_index (const raft::handle_t &handle, const int64_t y, const int64_t y_hat, const int64_t n)

double	adjusted_rand_index (const raft::handle_t &handle, const int y, const int y_hat, const int n)

Function Documentation

◆ accuracy_score_py()

float ML::Metrics::accuracy_score_py	(	const raft::handle_t &	handle,
		const int *	predictions,
		const int *	ref_predictions,
		int	n
	)

Calculates the "accuracy" between two input numpy arrays/ cudf series

The accuracy metric is used to calculate the accuracy of the predict labels predict labels

Parameters

handle	raft::handle_t
predictions	predicted labels
ref_predictions	truth labels
n	Number of elements in y and y_hat

Returns: : The accuracy

◆ adjusted_rand_index() [1/2]

double ML::Metrics::adjusted_rand_index	(	const raft::handle_t &	handle,
		const int *	y,
		const int *	y_hat,
		const int	n
	)

◆ adjusted_rand_index() [2/2]

double ML::Metrics::adjusted_rand_index	(	const raft::handle_t &	handle,
		const int64_t *	y,
		const int64_t *	y_hat,
		const int64_t	n
	)

Calculates the "adjusted rand index"

This metric is the corrected-for-chance version of the rand index

Parameters

handle	raft::handle_t
y	Array of response variables of the first clustering classifications
y_hat	Array of response variables of the second clustering classifications
n	Number of elements in y and y_hat

Returns: : The adjusted rand index value

◆ completeness_score()

double ML::Metrics::completeness_score	(	const raft::handle_t &	handle,
		const int *	y,
		const int *	y_hat,
		const int	n,
		const int	lower_class_range,
		const int	upper_class_range
	)

Calculates the "completeness score" between two clusters

A clustering result satisfies completeness if all the data points that are members of a given class are elements of the same cluster.

Parameters

handle	raft::handle_t
y	truth labels
y_hat	predicted labels
n	Number of elements in y and y_hat
lower_class_range	the lowest value in the range of classes
upper_class_range	the highest value in the range of classes

Returns: : The completeness score

◆ entropy()

double ML::Metrics::entropy	(	const raft::handle_t &	handle,
		const int *	y,
		const int	n,
		const int	lower_class_range,
		const int	upper_class_range
	)

Calculates the "entropy" of a labelling

This metric is a measure of the purity/polarity of the clustering

Parameters

handle	raft::handle_t
y	Array of response variables of the clustering
n	Number of elements in y
lower_class_range	the lowest value in the range of classes
upper_class_range	the highest value in the range of classes

Returns: : The entropy value of the clustering

◆ homogeneity_score()

double ML::Metrics::homogeneity_score	(	const raft::handle_t &	handle,
		const int *	y,
		const int *	y_hat,
		const int	n,
		const int	lower_class_range,
		const int	upper_class_range
	)

Calculates the "homogeneity score" between two clusters

A clustering result satisfies homogeneity if all of its clusters contain only data points which are members of a single class.

Parameters

handle	raft::handle_t
y	truth labels
y_hat	predicted labels
n	Number of elements in y and y_hat
lower_class_range	the lowest value in the range of classes
upper_class_range	the highest value in the range of classes

Returns: : The homogeneity score

◆ kl_divergence() [1/2]

double ML::Metrics::kl_divergence	(	const raft::handle_t &	handle,
		const double *	y,
		const double *	y_hat,
		int	n
	)

Calculates the "Kullback-Leibler Divergence"

The KL divergence tells us how well the probability distribution Q approximates the probability distribution P It is often also used as a 'distance metric' between two probability distributions (not symmetric)

Parameters

handle	raft::handle_t
y	Array of probabilities corresponding to distribution P
y_hat	Array of probabilities corresponding to distribution Q
n	Number of elements in y and y_hat

Returns: : The KL Divergence value

◆ kl_divergence() [2/2]

float ML::Metrics::kl_divergence	(	const raft::handle_t &	handle,
		const float *	y,
		const float *	y_hat,
		int	n
	)

Calculates the "Kullback-Leibler Divergence"

The KL divergence tells us how well the probability distribution Q approximates the probability distribution P It is often also used as a 'distance metric' between two probability distributions (not symmetric)

Parameters

handle	raft::handle_t
y	Array of probabilities corresponding to distribution P
y_hat	Array of probabilities corresponding to distribution Q
n	Number of elements in y and y_hat

Returns: : The KL Divergence value

◆ mutual_info_score()

double ML::Metrics::mutual_info_score	(	const raft::handle_t &	handle,
		const int *	y,
		const int *	y_hat,
		const int	n,
		const int	lower_class_range,
		const int	upper_class_range
	)

Calculates the "Mutual Information score" between two clusters

Mutual Information is a measure of the similarity between two labels of the same data.

Parameters

handle	raft::handle_t
y	Array of response variables of the first clustering classifications
y_hat	Array of response variables of the second clustering classifications
n	Number of elements in y and y_hat
lower_class_range	the lowest value in the range of classes
upper_class_range	the highest value in the range of classes

Returns: : The mutual information score

◆ pairwise_distance() [1/2]

void ML::Metrics::pairwise_distance	(	const raft::handle_t &	handle,
		const double *	x,
		const double *	y,
		double *	dist,
		int	m,
		int	n,
		int	k,
		cuvs::distance::DistanceType	metric,
		bool	isRowMajor = `true`,
		double	metric_arg = `2.0`
	)

Calculates the ij pairwise distances between two input arrays of double type.

Parameters

handle	raft::handle_t
x	pointer to the input data samples array (mRows x kCols)
y	pointer to the second input data samples array. Can use the same pointer as x (nRows x kCols)
dist	output pointer where the results will be stored (mRows x nCols)
m	number of rows in x
n	number of rows in y
k	number of cols in x and y (must be the same)
metric	the distance metric to use for the calculation
isRowMajor	specifies whether the x and y data pointers are row (C type array) or col (F type array) major
metric_arg	the value of `p` for Minkowski (l-p) distances.

◆ pairwise_distance() [2/2]

void ML::Metrics::pairwise_distance	(	const raft::handle_t &	handle,
		const float *	x,
		const float *	y,
		float *	dist,
		int	m,
		int	n,
		int	k,
		cuvs::distance::DistanceType	metric,
		bool	isRowMajor = `true`,
		float	metric_arg = `2.0f`
	)

Calculates the ij pairwise distances between two input arrays of float type.

Parameters

handle	raft::handle_t
x	pointer to the input data samples array (mRows x kCols)
y	pointer to the second input data samples array. Can use the same pointer as x (nRows x kCols)
dist	output pointer where the results will be stored (mRows x nCols)
m	number of rows in x
n	number of rows in y
k	number of cols in x and y (must be the same)
metric	the distance metric to use for the calculation
isRowMajor	specifies whether the x and y data pointers are row (C type array) or col (F type array) major
metric_arg	the value of `p` for Minkowski (l-p) distances.

◆ pairwiseDistance_sparse() [1/2]

void ML::Metrics::pairwiseDistance_sparse	(	const raft::handle_t &	handle,
		double *	x,
		double *	y,
		double *	dist,
		int	x_nrows,
		int	y_nrows,
		int	n_cols,
		int	x_nnz,
		int	y_nnz,
		int *	x_indptr,
		int *	y_indptr,
		int *	x_indices,
		int *	y_indices,
		cuvs::distance::DistanceType	metric,
		float	metric_arg
	)

◆ pairwiseDistance_sparse() [2/2]

void ML::Metrics::pairwiseDistance_sparse	(	const raft::handle_t &	handle,
		float *	x,
		float *	y,
		float *	dist,
		int	x_nrows,
		int	y_nrows,
		int	n_cols,
		int	x_nnz,
		int	y_nnz,
		int *	x_indptr,
		int *	y_indptr,
		int *	x_indices,
		int *	y_indices,
		cuvs::distance::DistanceType	metric,
		float	metric_arg
	)

◆ r2_score_py() [1/2]

double ML::Metrics::r2_score_py	(	const raft::handle_t &	handle,
		double *	y,
		double *	y_hat,
		int	n
	)

Calculates the "Coefficient of Determination" (R-Squared) score normalizing the sum of squared errors by the total sum of squares with double precision.

This score indicates the proportionate amount of variation in an expected response variable is explained by the independent variables in a linear regression model. The larger the R-squared value, the more variability is explained by the linear regression model.

Parameters

handle	raft::handle_t
y	Array of ground-truth response variables
y_hat	Array of predicted response variables
n	Number of elements in y and y_hat

Returns: : The R-squared value.

◆ r2_score_py() [2/2]

float ML::Metrics::r2_score_py	(	const raft::handle_t &	handle,
		float *	y,
		float *	y_hat,
		int	n
	)

Calculates the "Coefficient of Determination" (R-Squared) score normalizing the sum of squared errors by the total sum of squares with single precision.

This score indicates the proportionate amount of variation in an expected response variable is explained by the independent variables in a linear regression model. The larger the R-squared value, the more variability is explained by the linear regression model.

Parameters

handle	raft::handle_t
y	Array of ground-truth response variables
y_hat	Array of predicted response variables
n	Number of elements in y and y_hat

Returns: : The R-squared value.

◆ rand_index()

double ML::Metrics::rand_index	(	const raft::handle_t &	handle,
		double *	y,
		double *	y_hat,
		int	n
	)

Calculates the "rand index"

This metric is a measure of similarity between two data clusterings.

Parameters

handle	raft::handle_t
y	Array of response variables of the first clustering classifications
y_hat	Array of response variables of the second clustering classifications
n	Number of elements in y and y_hat

Returns: : The rand index value

◆ silhouette_score()

double ML::Metrics::silhouette_score	(	const raft::handle_t &	handle,
		double *	y,
		int	nRows,
		int	nCols,
		int *	labels,
		int	nLabels,
		double *	silScores,
		cuvs::distance::DistanceType	metric
	)

Calculates the "Silhouette Score"

The Silhouette Coefficient is calculated using the mean intra-cluster distance (a) and the mean nearest-cluster distance (b) for each sample. The Silhouette Coefficient for a sample is (b - a) / max(a, b). To clarify, b is the distance between a sample and the nearest cluster that the sample is not a part of. Note that Silhouette Coefficient is only defined if number of labels is 2 <= n_labels <= n_samples - 1.

Parameters

handle	raft::handle_t
y	Array of data samples with dimensions (nRows x nCols)
nRows	number of data samples
nCols	number of features
labels	Array containing labels for every data sample (1 x nRows)
nLabels	number of Labels
metric	the numerical value that maps to the type of distance metric to be used in the calculations
silScores	Array that is optionally taken in as input if required to be populated with the silhouette score for every sample (1 x nRows), else nullptr is passed

◆ trustworthiness_score()

template<typename math_t , cuvs::distance::DistanceType distance_type>

double ML::Metrics::trustworthiness_score	(	const raft::handle_t &	h,
		const math_t *	X,
		math_t *	X_embedded,
		int	n,
		int	m,
		int	d,
		int	n_neighbors,
		int	batchSize = `512`
	)

Compute the trustworthiness score.

Parameters

h	Raft handle
X	Data in original dimension
X_embedded	Data in target dimension (embedding)
n	Number of samples
m	Number of features in high/original dimension
d	Number of features in low/embedded dimension
n_neighbors	Number of neighbors considered by trustworthiness score
batchSize	Batch size

Template Parameters

distance_type Distance type to consider

Returns: Trustworthiness score

◆ v_measure()

double ML::Metrics::v_measure	(	const raft::handle_t &	handle,
		const int *	y,
		const int *	y_hat,
		const int	n,
		const int	lower_class_range,
		const int	upper_class_range,
		double	beta
	)

Calculates the "v-measure" between two clusters

v-measure is the harmonic mean between the homogeneity and completeness scores of 2 cluster classifications

Parameters

handle	raft::handle_t
y	truth labels
y_hat	predicted labels
n	Number of elements in y and y_hat
lower_class_range	the lowest value in the range of classes
upper_class_range	the highest value in the range of classes
beta	Ratio of weight attributed to homogeneity vs completeness

Returns: : The v-measure

Namespaces

Functions

Function Documentation

◆ accuracy_score_py()

◆ adjusted_rand_index() [1/2]

◆ adjusted_rand_index() [2/2]

◆ completeness_score()

◆ entropy()

◆ homogeneity_score()

◆ kl_divergence() [1/2]

◆ kl_divergence() [2/2]

◆ mutual_info_score()

◆ pairwise_distance() [1/2]

◆ pairwise_distance() [2/2]

◆ pairwiseDistance_sparse() [1/2]

◆ pairwiseDistance_sparse() [2/2]

◆ r2_score_py() [1/2]

◆ r2_score_py() [2/2]

◆ rand_index()

◆ silhouette_score()

◆ trustworthiness_score()

◆ v_measure()