Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release.
HNSW#
HNSW is a graph-based nearest neighbors implementation for the CPU. This implementation provides the ability to serialize a CAGRA graph and read it as a base-layer-only hnswlib graph.
#include <raft/neighbors/hnsw.hpp>
namespace raft::neighbors::hnsw
-
template<typename T, typename IdxT>
std::unique_ptr<index<T>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<T, IdxT> cagra_index)# Construct an hnswlib base-layer-only index from a CAGRA index NOTE: 1. This method uses the filesystem to write the CAGRA index in
/tmp/<random_number>.bin
before reading it as an hnswlib index, then deleting the temporary file.This function is only offered as a compiled symbol in
libraft.so
Usage example:
// Build a CAGRA index using namespace raft::neighbors; // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // Load CAGRA index as base-layer-only hnswlib index auto hnsw_index = hnsw::from_cagra(res, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
res – [in] raft resources
cagra_index – [in] cagra index
-
template<>
std::unique_ptr<index<float>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<float, uint32_t> cagra_index)#
-
template<>
std::unique_ptr<index<int8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<int8_t, uint32_t> cagra_index)#
-
template<>
std::unique_ptr<index<uint8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<uint8_t, uint32_t> cagra_index)#
-
template<typename T>
void search(raft::resources const &res, const search_params ¶ms, const index<T> &idx, raft::host_matrix_view<const T, int64_t, row_major> queries, raft::host_matrix_view<uint64_t, int64_t, row_major> neighbors, raft::host_matrix_view<float, int64_t, row_major> distances)# Search hnswlib base-layer-only index constructed from a CAGRA index.
Usage example:
// Build a CAGRA index using namespace raft::neighbors; // use default index parameters cagra::index_params index_params; // create and fill the index from a [N, D] dataset auto index = cagra::build(res, index_params, dataset); // Save CAGRA index as base layer only hnswlib index hnsw::serialize(res, "my_index.bin", index); // Load CAGRA index as base layer only hnswlib index raft::neighbors::hnsw::index* hnsw_index; auto hnsw_index = hnsw::deserialize(res, "my_index.bin", D, raft::distance::L2Expanded); // Search K nearest neighbors as an hnswlib index // using host threads for concurrency hnsw::search_params search_params; search_params.ef = 50 // ef >= K; search_params.num_threads = 10; auto neighbors = raft::make_host_matrix<uint32_t>(res, n_queries, k); auto distances = raft::make_host_matrix<float>(res, n_queries, k); hnsw::search(res, search_params, *index, queries, neighbors, distances); // de-allocate hnsw_index delete hnsw_index;
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
res – [in] raft resources
params – [in] configure the search
idx – [in] cagra index
queries – [in] a host matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a host matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a host matrix view to the distances to the selected neighbors [n_queries, k]
-
struct search_params : public raft::neighbors::ann::search_params#
- #include <hnsw_types.hpp>
-
template<typename T>
struct index : public raft::neighbors::ann::index# - #include <hnsw_types.hpp>
Public Functions
-
inline index(int dim, raft::distance::DistanceType metric)#
load a base-layer-only hnswlib index originally saved from a built CAGRA index. This is a virtual class and it cannot be used directly. To create an index, use the factory function
raft::neighbors::hnsw::from_cagra
from the headerraft/neighbors/hnsw.hpp
- Parameters:
dim – [in] dimensions of the training dataset
metric – [in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)
-
virtual auto get_index() const -> void const* = 0#
Get underlying index.
-
virtual void set_ef(int ef) const#
Set ef for search.
-
inline index(int dim, raft::distance::DistanceType metric)#
Serializer Methods#
#include <raft/neighbors/hnsw_serialize.cuh>
namespace raft::neighbors::hnsw
-
template<typename T>
std::unique_ptr<index<T>> deserialize(raft::resources const &handle, const std::string &filename, int dim, raft::distance::DistanceType metric)# Load an hnswlib index which was serialized from a CAGRA index
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); // create an an unallocated pointer int dim = 10; raft::distance::DistanceType = raft::distance::L2Expanded auto index = raft::deserialize(handle, filename, dim, metric);
- Template Parameters:
T – data element type
- Parameters:
handle – [in] the raft handle
filename – [in] the file name for saving the index
dim – [in] dimensionality of the index
metric – [in] metric used to build the index
- Returns:
std::unique_ptr<index<T>>