Attention

The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release.

HNSW#

HNSW is a graph-based nearest neighbors implementation for the CPU. This implementation provides the ability to serialize a CAGRA graph and read it as a base-layer-only hnswlib graph.

#include <raft/neighbors/hnsw.hpp>

namespace raft::neighbors::hnsw

template<typename T, typename IdxT>
std::unique_ptr<index<T>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<T, IdxT> cagra_index)#

Construct an hnswlib base-layer-only index from a CAGRA index NOTE: 1. This method uses the filesystem to write the CAGRA index in /tmp/<random_number>.bin before reading it as an hnswlib index, then deleting the temporary file.

  1. This function is only offered as a compiled symbol in libraft.so

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as base-layer-only hnswlib index
auto hnsw_index = hnsw::from_cagra(res, index);

Template Parameters:
  • T – data element type

  • IdxT – type of the indices

Parameters:
  • res[in] raft resources

  • cagra_index[in] cagra index

template<>
std::unique_ptr<index<float>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<float, uint32_t> cagra_index)#
template<>
std::unique_ptr<index<int8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<int8_t, uint32_t> cagra_index)#
template<>
std::unique_ptr<index<uint8_t>> from_cagra(raft::resources const &res, raft::neighbors::cagra::index<uint8_t, uint32_t> cagra_index)#
template<typename T>
void search(raft::resources const &res, const search_params &params, const index<T> &idx, raft::host_matrix_view<const T, int64_t, row_major> queries, raft::host_matrix_view<uint64_t, int64_t, row_major> neighbors, raft::host_matrix_view<float, int64_t, row_major> distances)#

Search hnswlib base-layer-only index constructed from a CAGRA index.

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Save CAGRA index as base layer only hnswlib index
hnsw::serialize(res, "my_index.bin", index);

// Load CAGRA index as base layer only hnswlib index
raft::neighbors::hnsw::index* hnsw_index;
auto hnsw_index = hnsw::deserialize(res, "my_index.bin", D, raft::distance::L2Expanded);

// Search K nearest neighbors as an hnswlib index
// using host threads for concurrency
hnsw::search_params search_params;
search_params.ef = 50 // ef >= K;
search_params.num_threads = 10;
auto neighbors = raft::make_host_matrix<uint32_t>(res, n_queries, k);
auto distances = raft::make_host_matrix<float>(res, n_queries, k);
hnsw::search(res, search_params, *index, queries, neighbors, distances);
// de-allocate hnsw_index
delete hnsw_index;

Template Parameters:
  • T – data element type

  • IdxT – type of the indices

Parameters:
  • res[in] raft resources

  • params[in] configure the search

  • idx[in] cagra index

  • queries[in] a host matrix view to a row-major matrix [n_queries, index->dim()]

  • neighbors[out] a host matrix view to the indices of the neighbors in the source dataset [n_queries, k]

  • distances[out] a host matrix view to the distances to the selected neighbors [n_queries, k]

struct search_params : public raft::neighbors::ann::search_params#
#include <hnsw_types.hpp>
template<typename T>
struct index : public raft::neighbors::ann::index#
#include <hnsw_types.hpp>

Public Functions

inline index(int dim, raft::distance::DistanceType metric)#

load a base-layer-only hnswlib index originally saved from a built CAGRA index. This is a virtual class and it cannot be used directly. To create an index, use the factory function raft::neighbors::hnsw::from_cagra from the header raft/neighbors/hnsw.hpp

Parameters:
  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

virtual auto get_index() const -> void const* = 0#

Get underlying index.

virtual void set_ef(int ef) const#

Set ef for search.

Serializer Methods#

#include <raft/neighbors/hnsw_serialize.cuh>

namespace raft::neighbors::hnsw

template<typename T>
std::unique_ptr<index<T>> deserialize(raft::resources const &handle, const std::string &filename, int dim, raft::distance::DistanceType metric)#

Load an hnswlib index which was serialized from a CAGRA index

Experimental, both the API and the serialization format are subject to change.

#include <raft/core/resources.hpp>

raft::resources handle;

// create a string with a filepath
std::string filename("/path/to/index");
// create an an unallocated pointer
int dim = 10;
raft::distance::DistanceType = raft::distance::L2Expanded
auto index = raft::deserialize(handle, filename, dim, metric);
Template Parameters:

T – data element type

Parameters:
  • handle[in] the raft handle

  • filename[in] the file name for saving the index

  • dim[in] dimensionality of the index

  • metric[in] metric used to build the index

Returns:

std::unique_ptr<index<T>>