Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release.
CAGRA#
CAGRA is a graph-based nearest neighbors implementation with state-of-the art query performance for both small- and large-batch sized search.
Please note that the CAGRA implementation is currently experimental and the API is subject to change from release to release. We are currently working on promoting CAGRA to a top-level stable API within RAFT.
#include <raft/neighbors/cagra.cuh>
namespace raft::neighbors::cagra
-
enum class graph_build_algo#
ANN algorithm used by CAGRA to build knn graph.
Values:
-
enumerator IVF_PQ#
-
enumerator NN_DESCENT#
-
enumerator IVF_PQ#
-
enum class search_algo#
Values:
-
enumerator SINGLE_CTA#
For large batch sizes.
-
enumerator MULTI_CTA#
For small batch sizes.
-
enumerator MULTI_KERNEL#
-
enumerator AUTO#
-
enumerator SINGLE_CTA#
-
template<typename DataT, typename IdxT, typename accessor>
void build_knn_graph(raft::resources const &res, mdspan<const DataT, matrix_extent<int64_t>, row_major, accessor> dataset, raft::host_matrix_view<IdxT, int64_t, row_major> knn_graph, std::optional<float> refine_rate = std::nullopt, std::optional<ivf_pq::index_params> build_params = std::nullopt, std::optional<ivf_pq::search_params> search_params = std::nullopt)# Build a kNN graph using IVF-PQ.
The kNN graph is the first building block for CAGRA index.
The output is a dense matrix that stores the neighbor indices for each point in the dataset. Each point has the same number of neighbors.
See cagra::build for an alternative method.
The following distance metrics are supported:
L2Expanded
InnerProduct
Usage example:
using namespace raft::neighbors; // use default index parameters based on shape of the dataset ivf_pq::index_params build_params = ivf_pq::index_params::from_dataset(dataset); ivf_pq::search_params search_params; auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128); // create knn graph cagra::build_knn_graph(res, dataset, knn_graph.view(), 2, build_params, search_params); auto optimized_gaph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 64); cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); // Construct an index from dataset and optimized knn_graph auto index = cagra::index<T, IdxT>(res, build_params.metric(), dataset, optimized_graph.view());
- Template Parameters:
DataT – data element type
IdxT – type of the dataset vector indices
- Parameters:
res – [in] raft resources
dataset – [in] a matrix view (host or device) to a row-major matrix [n_rows, dim]
knn_graph – [out] a host matrix view to store the output knn graph [n_rows, graph_degree]
refine_rate – [in] (optional) refinement rate for ivf-pq search
build_params – [in] (optional) ivf_pq index building parameters for knn graph
search_params – (optional) ivf_pq search parameters
-
template<typename DataT, typename IdxT = uint32_t, typename accessor = host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>>
void build_knn_graph(raft::resources const &res, mdspan<const DataT, matrix_extent<int64_t>, row_major, accessor> dataset, raft::host_matrix_view<IdxT, int64_t, row_major> knn_graph, experimental::nn_descent::index_params build_params)# Build a kNN graph using NN-descent.
The kNN graph is the first building block for CAGRA index.
The output is a dense matrix that stores the neighbor indices for each point in the dataset. Each point has the same number of neighbors.
See cagra::build for an alternative method.
The following distance metrics are supported:
L2Expanded
Usage example:
using namespace raft::neighbors; using namespace raft::neighbors::experimental; // use default index parameters nn_descent::index_params build_params; build_params.graph_degree = 128; auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128); // create knn graph cagra::build_knn_graph(res, dataset, knn_graph.view(), build_params); auto optimized_gaph = raft::make_host_matrix<IdxT, int64_t>(dataset.extent(0), 64); cagra::optimize(res, dataset, nn_descent_index.graph.view(), optimized_graph.view()); // Construct an index from dataset and optimized knn_graph auto index = cagra::index<T, IdxT>(res, build_params.metric(), dataset, optimized_graph.view());
- Template Parameters:
DataT – data element type
IdxT – type of the dataset vector indices
accessor – host or device accessor_type for the dataset
- Parameters:
res – [in] raft::resources is an object mangaging resources
dataset – [in] input raft::host/device_matrix_view that can be located in in host or device memory
knn_graph – [out] a host matrix view to store the output knn graph [n_rows, graph_degree]
build_params – [in] an instance of experimental::nn_descent::index_params that are parameters to run the nn-descent algorithm
-
template<typename DataT, typename IdxT = uint32_t, typename d_accessor = host_device_accessor<std::experimental::default_accessor<DataT>, memory_type::device>, typename g_accessor = host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
void sort_knn_graph(raft::resources const &res, mdspan<const DataT, matrix_extent<int64_t>, row_major, d_accessor> dataset, mdspan<IdxT, matrix_extent<int64_t>, row_major, g_accessor> knn_graph)# Sort a KNN graph index. Preprocessing step for
cagra::optimize
: If a KNN graph is not built usingcagra::build_knn_graph
, then it is necessary to call this function before callingcagra::optimize
. If the graph is built bycagra::build_knn_graph
, it is already sorted and you do not need to call this function.Usage example:
using namespace raft::neighbors; cagra::index_params build_params; auto knn_graph = raft::make_host_matrix<IdxT, IdxT>(dataset.extent(0), 128); // build KNN graph not using `cagra::build_knn_graph` // build(knn_graph, dataset, ...); // sort graph index sort_knn_graph(res, dataset.view(), knn_graph.view()); // optimize graph cagra::optimize(res, dataset, knn_graph.view(), optimized_graph.view()); // Construct an index from dataset and optimized knn_graph auto index = cagra::index<T, IdxT>(res, build_params.metric(), dataset, optimized_graph.view());
- Template Parameters:
DataT – type of the data in the source dataset
IdxT – type of the dataset vector indices
- Parameters:
res – [in] raft resources
dataset – [in] a matrix view (host or device) to a row-major matrix [n_rows, dim]
knn_graph – [inout] a matrix view (host or device) of the input knn graph [n_rows, knn_graph_degree]
-
template<typename IdxT = uint32_t, typename g_accessor = host_device_accessor<std::experimental::default_accessor<IdxT>, memory_type::host>>
void optimize(raft::resources const &res, mdspan<IdxT, matrix_extent<int64_t>, row_major, g_accessor> knn_graph, raft::host_matrix_view<IdxT, int64_t, row_major> new_graph)# Prune a KNN graph.
Decrease the number of neighbors for each node.
See cagra::build_knn_graph for usage example
- Template Parameters:
IdxT – type of the indices in the source dataset
- Parameters:
res – [in] raft resources
knn_graph – [in] a matrix view (host or device) of the input knn graph [n_rows, knn_graph_degree]
new_graph – [out] a host matrix view of the optimized knn graph [n_rows, graph_degree]
-
template<typename T, typename IdxT = uint32_t, typename Accessor = host_device_accessor<std::experimental::default_accessor<T>, memory_type::host>>
index<T, IdxT> build(raft::resources const &res, const index_params ¶ms, mdspan<const T, matrix_extent<int64_t>, row_major, Accessor> dataset)# Build the index from the dataset for efficient search.
The build consist of two steps: build an intermediate knn-graph, and optimize it to create the final graph. The index_params struct controls the node degree of these graphs.
It is required that dataset and the optimized graph fit the GPU memory.
To customize the parameters for knn-graph building and pruning, and to reuse the intermediate results, you could build the index in two steps using cagra::build_knn_graph and cagra::optimize.
The following distance metrics are supported:
L2
Usage example:
using namespace raft::neighbors; // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // use default search parameters cagra::search_params search_params; // search K nearest neighbours auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k); auto distances = raft::make_device_matrix<float>(res, n_queries, k); cagra::search(res, search_params, index, queries, neighbors, distances);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
res – [in]
params – [in] parameters for building the index
dataset – [in] a matrix view (host or device) to a row-major matrix [n_rows, dim]
- Returns:
the constructed cagra index
-
template<typename T, typename IdxT, typename CagraSampleFilterT>
void search_with_filtering(raft::resources const &res, const search_params ¶ms, const index<T, IdxT> &idx, raft::device_matrix_view<const T, int64_t, row_major> queries, raft::device_matrix_view<IdxT, int64_t, row_major> neighbors, raft::device_matrix_view<float, int64_t, row_major> distances, CagraSampleFilterT sample_filter = CagraSampleFilterT())# Search ANN using the constructed index with the given sample filter.
Usage example:
using namespace raft::neighbors; // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // use default search parameters cagra::search_params search_params; // create a bitset to filter the search auto removed_indices = raft::make_device_vector<IdxT>(res, n_removed_indices); raft::core::bitset<std::uint32_t, IdxT> removed_indices_bitset( res, removed_indices.view(), dataset.extent(0)); // search K nearest neighbours according to a bitset auto neighbors = raft::make_device_matrix<uint32_t>(res, n_queries, k); auto distances = raft::make_device_matrix<float>(res, n_queries, k); cagra::search_with_filtering(res, search_params, index, queries, neighbors, distances, filtering::bitset_filter(removed_indices_bitset.view()));
- Template Parameters:
T – data element type
IdxT – type of the indices
CagraSampleFilterT – Device filter function, with the signature
(uint32_t query ix, uint32_t sample_ix) -> bool
- Parameters:
res – [in] raft resources
params – [in] configure the search
idx – [in] cagra index
queries – [in] a device matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device matrix view to the distances to the selected neighbors [n_queries, k]
sample_filter – [in] a device filter function that greenlights samples for a given query
-
template<typename T, typename IdxT>
void search(raft::resources const &res, const search_params ¶ms, const index<T, IdxT> &idx, raft::device_matrix_view<const T, int64_t, row_major> queries, raft::device_matrix_view<IdxT, int64_t, row_major> neighbors, raft::device_matrix_view<float, int64_t, row_major> distances)# Search ANN using the constructed index.
See the cagra::build documentation for a usage example.
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
res – [in] raft resources
params – [in] configure the search
idx – [in] cagra index
queries – [in] a device matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device matrix view to the distances to the selected neighbors [n_queries, k]
-
struct index_params : public raft::neighbors::ann::index_params#
- #include <cagra_types.hpp>
Public Members
-
size_t intermediate_graph_degree = 128#
Degree of input graph for pruning.
-
size_t graph_degree = 64#
Degree of output graph.
-
graph_build_algo build_algo = graph_build_algo::IVF_PQ#
ANN algorithm to build knn graph.
-
size_t nn_descent_niter = 20#
Number of Iterations to run if building with NN_DESCENT
-
std::optional<vpq_params> compression = std::nullopt#
Specify compression params if compression is desired.
NOTE: this is experimental new API, consider it unsafe.
-
size_t intermediate_graph_degree = 128#
-
struct search_params : public raft::neighbors::ann::search_params#
- #include <cagra_types.hpp>
Public Members
-
size_t max_queries = 0#
Maximum number of queries to search at the same time (batch size). Auto select when 0.
-
size_t itopk_size = 64#
Number of intermediate search results retained during the search.
This is the main knob to adjust trade off between accuracy and search speed. Higher values improve the search accuracy.
-
size_t max_iterations = 0#
Upper limit of search iterations. Auto select when 0.
-
search_algo algo = search_algo::AUTO#
Which search implementation to use.
-
size_t team_size = 0#
Number of threads used to calculate a single distance. 4, 8, 16, or 32.
-
size_t search_width = 1#
Number of graph nodes to select as the starting point for the search in each iteration. aka search width?
-
size_t min_iterations = 0#
Lower limit of search iterations.
-
size_t thread_block_size = 0#
Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.
-
size_t hashmap_min_bitlen = 0#
Lower limit of hashmap bit length. More than 8.
-
float hashmap_max_fill_rate = 0.5#
Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
-
uint32_t num_random_samplings = 1#
Number of iterations of initial random seed node selection. 1 or more.
-
uint64_t rand_xor_mask = 0x128394#
Bit mask used for initial random seed node selection.
-
size_t max_queries = 0#
-
template<typename T, typename IdxT>
struct index : public raft::neighbors::ann::index# - #include <cagra_types.hpp>
CAGRA index.
The index stores the dataset and a kNN graph in device memory.
- Template Parameters:
T – data element type
IdxT – type of the vector indices (represent dataset.extent(0))
Public Functions
-
inline constexpr auto metric() const noexcept -> raft::distance::DistanceType#
Distance metric used for clustering.
-
inline constexpr auto dim() const noexcept -> uint32_t#
Dimensionality of the data.
-
inline constexpr auto graph_degree() const noexcept -> uint32_t#
Graph degree
-
inline auto dataset() const noexcept -> device_matrix_view<const T, int64_t, layout_stride>#
DEPRECATED: please use data() instead. If you need to query dataset dimensions, use the dim() and size() of the cagra index. The data_handle() is not always available: you need to do a dynamic_cast to the expected dataset type at runtime.
-
inline auto data() const noexcept -> const neighbors::dataset<int64_t>&#
Dataset [size, dim]
-
inline auto graph() const noexcept -> device_matrix_view<const IdxT, int64_t, row_major>#
neighborhood graph [size, graph-degree]
-
inline index(raft::resources const &res, raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded)#
Construct an empty index.
-
template<typename data_accessor, typename graph_accessor>
inline index(raft::resources const &res, raft::distance::DistanceType metric, mdspan<const T, matrix_extent<int64_t>, row_major, data_accessor> dataset, mdspan<const IdxT, matrix_extent<int64_t>, row_major, graph_accessor> knn_graph)# Construct an index from dataset and knn_graph arrays
If the dataset and graph is already in GPU memory, then the index is just a thin wrapper around these that stores a non-owning a reference to the arrays.
The constructor also accepts host arrays. In that case they are copied to the device, and the device arrays will be owned by the index.
In case the dasates rows are not 16 bytes aligned, then we create a padded copy in device memory to ensure alignment for vectorized load.
Usage examples:
Cagra index is normally created by the cagra::build
In the above example, we have passed a host dataset to build. The returned index will own a device copy of the dataset and the knn_graph. In contrast, if we pass the dataset as a device_mdspan to build, then it will only store a reference to it.using namespace raft::neighbors::experimental; auto dataset = raft::make_host_matrix<float, int64_t>(n_rows, n_cols); load_dataset(dataset.view()); // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // use default search parameters cagra::search_params search_params; // search K nearest neighbours auto neighbors = raft::make_device_matrix<uint32_t, int64_t>(res, n_queries, k); auto distances = raft::make_device_matrix<float, int64_t>(res, n_queries, k); cagra::search(res, search_params, index, queries, neighbors, distances);
Constructing index using existing knn-graph
using namespace raft::neighbors::experimental; auto dataset = raft::make_device_matrix<float, int64_t>(res, n_rows, n_cols); auto knn_graph = raft::make_device_matrix<uint32_n, int64_t>(res, n_rows, graph_degree); // custom loading and graph creation // load_dataset(dataset.view()); // create_knn_graph(knn_graph.view()); // Wrap the existing device arrays into an index structure cagra::index<T, IdxT> index(res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(knn_graph.view())); // Both knn_graph and dataset objects have to be in scope while the index is used because // the index only stores a reference to these. cagra::search(res, search_params, index, queries, neighbors, distances);
-
inline void update_dataset(raft::resources const &res, raft::device_matrix_view<const T, int64_t, row_major> dataset)#
Replace the dataset with a new dataset.
If the new dataset rows are aligned on 16 bytes, then only a reference is stored to the dataset. It is the caller’s responsibility to ensure that dataset stays alive as long as the index.
-
inline void update_dataset(raft::resources const &res, raft::device_matrix_view<const T, int64_t, layout_stride> dataset)#
Set the dataset reference explicitly to a device matrix view with padding.
-
inline void update_dataset(raft::resources const &res, raft::host_matrix_view<const T, int64_t, row_major> dataset)#
Replace the dataset with a new dataset.
We create a copy of the dataset on the device. The index manages the lifetime of this copy.
-
template<typename DatasetT>
inline auto update_dataset(raft::resources const &res, DatasetT &&dataset) -> std::enable_if_t<std::is_base_of_v<neighbors::dataset<int64_t>, DatasetT>># Replace the dataset with a new dataset.
-
inline void update_graph(raft::resources const &res, raft::device_matrix_view<const IdxT, int64_t, row_major> knn_graph)#
Replace the graph with a new graph.
Since the new graph is a device array, we store a reference to that, and it is the caller’s responsibility to ensure that knn_graph stays alive as long as the index.
-
inline void update_graph(raft::resources const &res, raft::host_matrix_view<const IdxT, int64_t, row_major> knn_graph)#
Replace the graph with a new graph.
We create a copy of the graph on the device. The index manages the lifetime of this copy.
Serializer Methods#
#include <raft/neighbors/cagra_serialize.cuh>
namespace raft::neighbors::cagra
-
template<typename T, typename IdxT>
void serialize(raft::resources const &handle, std::ostream &os, const index<T, IdxT> &index, bool include_dataset = true)# Write the index to an output stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> #include <raft/neighbors/cagra_serialize.cuh> raft::resources handle; // create an output stream std::ostream os(std::cout.rdbuf()); // create an index with `auto index = raft::neighbors::cagra::build(...);` raft::neighbors::cagra::serialize(handle, os, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
os – [in] output stream
index – [in] CAGRA index
include_dataset – [in] Whether or not to write out the dataset to the file.
-
template<typename T, typename IdxT>
void serialize(raft::resources const &handle, const std::string &filename, const index<T, IdxT> &index, bool include_dataset = true)# Save the index to file.
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> #include <raft/neighbors/cagra_serialize.cuh> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); // create an index with `auto index = raft::neighbors::cagra::build(...);` raft::neighbors::cagra::serialize(handle, filename, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
filename – [in] the file name for saving the index
index – [in] CAGRA index
include_dataset – [in] Whether or not to write out the dataset to the file.
-
template<typename T, typename IdxT>
void serialize_to_hnswlib(raft::resources const &handle, std::ostream &os, const raft::neighbors::cagra::index<T, IdxT> &index)# Write the CAGRA built index as a base layer HNSW index to an output stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> #include <raft/neighbors/cagra_serialize.cuh> raft::resources handle; // create an output stream std::ostream os(std::cout.rdbuf()); // create an index with `auto index = raft::neighbors::cagra::build(...);` raft::neighbors::cagra::serialize_to_hnswlib(handle, os, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
os – [in] output stream
index – [in] CAGRA index
-
template<typename T, typename IdxT>
void serialize_to_hnswlib(raft::resources const &handle, const std::string &filename, const raft::neighbors::cagra::index<T, IdxT> &index)# Save a CAGRA build index in hnswlib base-layer-only serialized format
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> #include <raft/neighbors/cagra_serialize.cuh> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); // create an index with `auto index = raft::neighbors::cagra::build(...);` raft::neighbors::cagra::serialize_to_hnswlib(handle, filename, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
filename – [in] the file name for saving the index
index – [in] CAGRA index
-
template<typename T, typename IdxT>
index<T, IdxT> deserialize(raft::resources const &handle, std::istream &is)# Load index from input stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> #include <raft/neighbors/cagra_serialize.cuh> raft::resources handle; // create an input stream std::istream is(std::cin.rdbuf()); using T = float; // data element type using IdxT = int; // type of the index auto index = raft::neighbors::cagra::deserialize<T, IdxT>(handle, is);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
is – [in] input stream
- Returns:
raft::neighbors::experimental::cagra::index<T, IdxT>
-
template<typename T, typename IdxT>
index<T, IdxT> deserialize(raft::resources const &handle, const std::string &filename)# Load index from file.
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> #include <raft/neighbors/cagra_serialize.cuh> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); using T = float; // data element type using IdxT = int; // type of the index auto index = raft::neighbors::cagra::deserialize<T, IdxT>(handle, filename);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
filename – [in] the name of the file that stores the index
- Returns:
raft::neighbors::experimental::cagra::index<T, IdxT>