Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release.
Neighborhood Model Scoring#
Trustworthiness#
#include <raft/stats/trustworthiness.cuh>
namespace raft::stats
-
template<raft::distance::DistanceType distance_type, typename value_t, typename idx_t>
double trustworthiness_score(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, raft::row_major> X, raft::device_matrix_view<const value_t, idx_t, raft::row_major> X_embedded, int n_neighbors, int batch_size = 512)# Compute the trustworthiness score.
Note
The constness of the data in X_embedded is currently casted away and the data is slightly modified.
- Template Parameters:
value_t – the data type
idx_t – Integer type used to for addressing
- Parameters:
handle – [in] the raft handle
X – [in] Data in original dimension
X_embedded – [in] Data in target dimension (embedding)
n_neighbors – [in] Number of neighbors considered by trustworthiness score
batch_size – [in] Batch size
- Returns:
Trustworthiness score
Neighborhood Recall#
#include <raft/stats/neighborhood_recall.cuh>
namespace raft::stats
-
template<typename IndicesValueType, typename IndexType, typename ScalarType, typename DistanceValueType = float>
void neighborhood_recall(raft::resources const &res, raft::device_matrix_view<const IndicesValueType, IndexType, raft::row_major> indices, raft::device_matrix_view<const IndicesValueType, IndexType, raft::row_major> ref_indices, raft::device_scalar_view<ScalarType> recall_score, std::optional<raft::device_matrix_view<const DistanceValueType, IndexType, raft::row_major>> distances = std::nullopt, std::optional<raft::device_matrix_view<const DistanceValueType, IndexType, raft::row_major>> ref_distances = std::nullopt, std::optional<raft::host_scalar_view<const DistanceValueType>> eps = std::nullopt)# Calculate Neighborhood Recall score on the device for indices, distances computed by any Nearest Neighbors Algorithm against reference indices, distances. Recall score is calculated by comparing the total number of matching indices and dividing that value by the total size of the indices matrix of dimensions (D, k). If distance matrices are provided, then non-matching indices could be considered a match if abs(dist, ref_dist) < eps.
Usage example:
raft::device_resources res; // assume D rows and N column dataset auto k = 64; auto indices = raft::make_device_matrix<int>(res, D, k); auto distances = raft::make_device_matrix<float>(res, D, k); // run ANN algorithm of choice auto ref_indices = raft::make_device_matrix<int>(res, D, k); auto ref_distances = raft::make_device_matrix<float>(res, D, k); // run brute-force KNN for reference auto scalar = 0.0f; auto recall_score = raft::make_device_scalar(res, scalar); raft::stats::neighborhood_recall(res, raft::make_const_mdspan(indices.view()), raft::make_const_mdspan(ref_indices.view()), recall_score.view(), raft::make_const_mdspan(distances.view()), raft::make_const_mdspan(ref_distances.view()));
- Template Parameters:
IndicesValueType – data-type of the indices
IndexType – data-type to index all matrices
ScalarType – data-type to store recall score
DistanceValueType – data-type of the distances
- Parameters:
res – raft::resources object to manage resources
indices – [in] raft::device_matrix_view indices of neighbors
ref_indices – [in] raft::device_matrix_view reference indices of neighbors
recall_score – [out] raft::device_scalar_view output recall score
distances – [in] (optional) raft::device_matrix_view distances of neighbors
ref_distances – [in] (optional) raft::device_matrix_view reference distances of neighbors
eps – [in] (optional, default = 0.001) value within which distances are considered matching
-
template<typename IndicesValueType, typename IndexType, typename ScalarType, typename DistanceValueType = float>
void neighborhood_recall(raft::resources const &res, raft::device_matrix_view<const IndicesValueType, IndexType, raft::row_major> indices, raft::device_matrix_view<const IndicesValueType, IndexType, raft::row_major> ref_indices, raft::host_scalar_view<ScalarType> recall_score, std::optional<raft::device_matrix_view<const DistanceValueType, IndexType, raft::row_major>> distances = std::nullopt, std::optional<raft::device_matrix_view<const DistanceValueType, IndexType, raft::row_major>> ref_distances = std::nullopt, std::optional<raft::host_scalar_view<const DistanceValueType>> eps = std::nullopt)# Calculate Neighborhood Recall score on the host for indices, distances computed by any Nearest Neighbors Algorithm against reference indices, distances. Recall score is calculated by comparing the total number of matching indices and dividing that value by the total size of the indices matrix of dimensions (D, k). If distance matrices are provided, then non-matching indices could be considered a match if abs(dist, ref_dist) < eps.
Usage example:
raft::device_resources res; // assume D rows and N column dataset auto k = 64; auto indices = raft::make_device_matrix<int>(res, D, k); auto distances = raft::make_device_matrix<float>(res, D, k); // run ANN algorithm of choice auto ref_indices = raft::make_device_matrix<int>(res, D, k); auto ref_distances = raft::make_device_matrix<float>(res, D, k); // run brute-force KNN for reference auto scalar = 0.0f; auto recall_score = raft::make_host_scalar(scalar); raft::stats::neighborhood_recall(res, raft::make_const_mdspan(indices.view()), raft::make_const_mdspan(ref_indices.view()), recall_score.view(), raft::make_const_mdspan(distances.view()), raft::make_const_mdspan(ref_distances.view()));
- Template Parameters:
IndicesValueType – data-type of the indices
IndexType – data-type to index all matrices
ScalarType – data-type to store recall score
DistanceValueType – data-type of the distances
- Parameters:
res – raft::resources object to manage resources
indices – [in] raft::device_matrix_view indices of neighbors
ref_indices – [in] raft::device_matrix_view reference indices of neighbors
recall_score – [out] raft::host_scalar_view output recall score
distances – [in] (optional) raft::device_matrix_view distances of neighbors
ref_distances – [in] (optional) raft::device_matrix_view reference distances of neighbors
eps – [in] (optional, default = 0.001) value within which distances are considered matching