HNSW#

This is a wrapper for hnswlib, to load a CAGRA index as an immutable HNSW index. The loaded HNSW index is only compatible in cuVS, and can be searched using wrapper functions.

#include <cuvs/neighbors/hnsw.hpp>

namespace cuvs::neighbors::hnsw

Index search parameters#

struct search_params : public cuvs::neighbors::search_params#
#include <hnsw.hpp>

Index#

template<typename T>
struct index : public cuvs::neighbors::index#
#include <hnsw.hpp>

Index extend parameters#

HnswHierarchy hierarchy = HnswHierarchy::NONE#

Hierarchy build type for HNSW index when converting from CAGRA index

int ef_construction = 200#

Size of the candidate list during hierarchy construction when hierarchy is CPU

int num_threads = 0#

Number of host threads to use to construct hierarchy when hierarchy is CPU or GPU. When the value is 0, the number of threads is automatically determined to the maximum number of threads available. NOTE: When hierarchy is GPU, while the majority of the work is done on the GPU, initialization of the HNSW index itself and some other work is parallelized with the help of CPU threads.

int num_threads = 0

Number of host threads to use to add additional vectors to the index. Value of 0 automatically maximizes parallelism.

int ef#
int num_threads = 0
inline index(
int dim,
cuvs::distance::DistanceType metric,
HnswHierarchy hierarchy = HnswHierarchy::NONE
)#

load a base-layer-only hnswlib index originally saved from a built CAGRA index. This is a virtual class and it cannot be used directly. To create an index, use the factory function cuvs::neighbors::hnsw::from_cagra from the header cuvs/neighbors/hnsw.hpp

Parameters:
  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

  • hierarchy[in] hierarchy used for upper HNSW layers

inline virtual ~index()#
virtual void const *get_index() const = 0#

Get underlying index.

inline int const dim() const
inline cuvs::distance::DistanceType metric() const
inline HnswHierarchy hierarchy() const
virtual void set_ef(int ef) const#

Set ef for search.

struct extend_params#
#include <hnsw.hpp>

Index extend#

void extend(
raft::resources const &res,
const extend_params &params,
raft::host_matrix_view<const float, int64_t, raft::row_major> additional_dataset,
index<float> &idx
)#

Add new vectors to an HNSW index NOTE: The HNSW index can only be extended when the hnsw::index_params.hierarchy is CPU when converting from a CAGRA index.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

// Extend the HNSW index with additional vectors
auto additional_dataset = raft::make_host_matrix<float>(res, add_size, index->dim());
hnsw::extend_params extend_params;
hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());

Parameters:
  • res[in] raft resources

  • params[in] configure the extend

  • additional_dataset[in] a host matrix view to a row-major matrix [n_rows, index->dim()]

  • idx[inout] HNSW index to extend

void extend(
raft::resources const &res,
const extend_params &params,
raft::host_matrix_view<const half, int64_t, raft::row_major> additional_dataset,
index<half> &idx
)#

Add new vectors to an HNSW index NOTE: The HNSW index can only be extended when the hnsw::index_params.hierarchy is CPU when converting from a CAGRA index.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

// Extend the HNSW index with additional vectors
auto additional_dataset = raft::make_host_matrix<half>(res, add_size, index->dim());
hnsw::extend_params extend_params;
hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());

Parameters:
  • res[in] raft resources

  • params[in] configure the extend

  • additional_dataset[in] a host matrix view to a row-major matrix [n_rows, index->dim()]

  • idx[inout] HNSW index to extend

void extend(
raft::resources const &res,
const extend_params &params,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> additional_dataset,
index<uint8_t> &idx
)#

Add new vectors to an HNSW index NOTE: The HNSW index can only be extended when the hnsw::index_params.hierarchy is CPU when converting from a CAGRA index.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

// Extend the HNSW index with additional vectors
auto additional_dataset = raft::make_host_matrix<uint8_t>(res, add_size, index->dim());
hnsw::extend_params extend_params;
hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());

Parameters:
  • res[in] raft resources

  • params[in] configure the extend

  • additional_dataset[in] a host matrix view to a row-major matrix [n_rows, index->dim()]

  • idx[inout] HNSW index to extend

void extend(
raft::resources const &res,
const extend_params &params,
raft::host_matrix_view<const int8_t, int64_t, raft::row_major> additional_dataset,
index<int8_t> &idx
)#

Add new vectors to an HNSW index NOTE: The HNSW index can only be extended when the hnsw::index_params.hierarchy is CPU when converting from a CAGRA index.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
hnsw_params.hierarchy = hnsw::HnswHierarchy::CPU;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

// Extend the HNSW index with additional vectors
auto additional_dataset = raft::make_host_matrix<int8_t>(res, add_size, index->dim());
hnsw::extend_params extend_params;
hnsw::extend(res, extend_params, additional_dataset, *hnsw_index.get());

Parameters:
  • res[in] raft resources

  • params[in] configure the extend

  • additional_dataset[in] a host matrix view to a row-major matrix [n_rows, index->dim()]

  • idx[inout] HNSW index to extend

Index load#

std::unique_ptr<index<float>> from_cagra(
raft::resources const &res,
const index_params &params,
const cuvs::neighbors::cagra::index<float, uint32_t> &cagra_index,
std::optional<raft::host_matrix_view<const float, int64_t, raft::row_major>> dataset = std::nullopt
)#

Construct an hnswlib index from a CAGRA index NOTE: When hnsw::index_params.hierarchy is:

  1. NONE: This method uses the filesystem to write the CAGRA index in /tmp/<random_number>.bin before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.

  2. CPU: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • cagra_index[in] cagra index

  • dataset[in] optional dataset to avoid extra memory copy when hierarchy is CPU

std::unique_ptr<index<half>> from_cagra(
raft::resources const &res,
const index_params &params,
const cuvs::neighbors::cagra::index<half, uint32_t> &cagra_index,
std::optional<raft::host_matrix_view<const half, int64_t, raft::row_major>> dataset = std::nullopt
)#

Construct an hnswlib index from a CAGRA index NOTE: When hnsw::index_params.hierarchy is:

  1. NONE: This method uses the filesystem to write the CAGRA index in /tmp/<random_number>.bin before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.

  2. CPU: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • cagra_index[in] cagra index

  • dataset[in] optional dataset to avoid extra memory copy when hierarchy is CPU

std::unique_ptr<index<uint8_t>> from_cagra(
raft::resources const &res,
const index_params &params,
const cuvs::neighbors::cagra::index<uint8_t, uint32_t> &cagra_index,
std::optional<raft::host_matrix_view<const uint8_t, int64_t, raft::row_major>> dataset = std::nullopt
)#

Construct an hnswlib index from a CAGRA index NOTE: When hnsw::index_params.hierarchy is:

  1. NONE: This method uses the filesystem to write the CAGRA index in /tmp/<random_number>.bin before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.

  2. CPU: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • cagra_index[in] cagra index

  • dataset[in] optional dataset to avoid extra memory copy when hierarchy is CPU

std::unique_ptr<index<int8_t>> from_cagra(
raft::resources const &res,
const index_params &params,
const cuvs::neighbors::cagra::index<int8_t, uint32_t> &cagra_index,
std::optional<raft::host_matrix_view<const int8_t, int64_t, raft::row_major>> dataset = std::nullopt
)#

Construct an hnswlib index from a CAGRA index NOTE: When hnsw::index_params.hierarchy is:

  1. NONE: This method uses the filesystem to write the CAGRA index in /tmp/<random_number>.bin before reading it as an hnswlib index, then deleting the temporary file. The returned index is immutable and can only be searched by the hnswlib wrapper in cuVS, as the format is not compatible with the original hnswlib.

  2. CPU: The returned index is mutable and can be extended with additional vectors. The serialized index is also compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace raft::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • cagra_index[in] cagra index

  • dataset[in] optional dataset to avoid extra memory copy when hierarchy is CPU

Index serialize#

void serialize(
raft::resources const &res,
const std::string &filename,
const index<float> &idx
)#

Serialize the HNSW index to file NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// Save the index
hnsw::serialize(res, "index.bin", index);

Parameters:
  • res[in] raft resources

  • filename[in] path to the file to save the serialized CAGRA index

  • idx[in] cagra index

void serialize(
raft::resources const &res,
const std::string &filename,
const index<half> &idx
)#

Serialize the HNSW index to file NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// Save the index
hnsw::serialize(res, "index.bin", index);

Parameters:
  • res[in] raft resources

  • filename[in] path to the file to save the serialized CAGRA index

  • idx[in] cagra index

void serialize(
raft::resources const &res,
const std::string &filename,
const index<uint8_t> &idx
)#

Serialize the HNSW index to file NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// Save the index
hnsw::serialize(res, "index.bin", index);

Parameters:
  • res[in] raft resources

  • filename[in] path to the file to save the serialized CAGRA index

  • idx[in] cagra index

void serialize(
raft::resources const &res,
const std::string &filename,
const index<int8_t> &idx
)#

Serialize the HNSW index to file NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

// Build a CAGRA index
using namespace cuvs::neighbors;
// use default index parameters
cagra::index_params index_params;
// create and fill the index from a [N, D] dataset
auto index = cagra::build(res, index_params, dataset);

// Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// Save the index
hnsw::serialize(res, "index.bin", index);

Parameters:
  • res[in] raft resources

  • filename[in] path to the file to save the serialized CAGRA index

  • idx[in] cagra index

void deserialize(
raft::resources const &res,
const index_params &params,
const std::string &filename,
int dim,
cuvs::distance::DistanceType metric,
index<float> **index
)#

De-serialize a CAGRA index saved to a file as an hnswlib index NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

 // Build a CAGRA index
 using namespace cuvs::neighbors;
 // use default index parameters
 cagra::index_params index_params;
 // create and fill the index from a [N, D] dataset
 auto index = cagra::build(res, index_params, dataset);

 // Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// save HNSW index to a file
hnsw::serialize(res, "index.bin", hnsw_index);
// De-serialize the HNSW index
index<float>* hnsw_index = nullptr;
hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);

 // Delete index after use
 delete hnsw_index;

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • filename[in] path to the file containing the serialized CAGRA index

  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

  • index[out] hnsw index

void deserialize(
raft::resources const &res,
const index_params &params,
const std::string &filename,
int dim,
cuvs::distance::DistanceType metric,
index<half> **index
)#

De-serialize a CAGRA index saved to a file as an hnswlib index NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

 // Build a CAGRA index
 using namespace cuvs::neighbors;
 // use default index parameters
 cagra::index_params index_params;
 // create and fill the index from a [N, D] dataset
 auto index = cagra::build(res, index_params, dataset);

 // Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// save HNSW index to a file
hnsw::serialize(res, "index.bin", hnsw_index);
// De-serialize the HNSW index
index<half>* hnsw_index = nullptr;
hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);

 // Delete index after use
 delete hnsw_index;

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • filename[in] path to the file containing the serialized CAGRA index

  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

  • index[out] hnsw index

void deserialize(
raft::resources const &res,
const index_params &params,
const std::string &filename,
int dim,
cuvs::distance::DistanceType metric,
index<uint8_t> **index
)#

De-serialize a CAGRA index saved to a file as an hnswlib index NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

 // Build a CAGRA index
 using namespace cuvs::neighbors;
 // use default index parameters
 cagra::index_params index_params;
 // create and fill the index from a [N, D] dataset
 auto index = cagra::build(res, index_params, dataset);

 // Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// save HNSW index to a file
hnsw::serialize(res, "index.bin", hnsw_index);
// De-serialize the HNSW index
index<uint8_t>* hnsw_index = nullptr;
hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);

 // Delete index after use
 delete hnsw_index;

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • filename[in] path to the file containing the serialized CAGRA index

  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

  • index[out] hnsw index

void deserialize(
raft::resources const &res,
const index_params &params,
const std::string &filename,
int dim,
cuvs::distance::DistanceType metric,
index<int8_t> **index
)#

De-serialize a CAGRA index saved to a file as an hnswlib index NOTE: When hierarchy is NONE, the saved hnswlib index is immutable and can only be read by the hnswlib wrapper in cuVS, as the serialization format is not compatible with the original hnswlib. However, when hierarchy is CPU, the saved hnswlib index is compatible with the original hnswlib library.

Usage example:

 // Build a CAGRA index
 using namespace cuvs::neighbors;
 // use default index parameters
 cagra::index_params index_params;
 // create and fill the index from a [N, D] dataset
 auto index = cagra::build(res, index_params, dataset);

 // Load CAGRA index as an HNSW index
hnsw::index_params hnsw_params;
auto hnsw_index = hnsw::from_cagra(res, hnsw_params, index);
// save HNSW index to a file
hnsw::serialize(res, "index.bin", hnsw_index);
// De-serialize the HNSW index
index<int8_t>* hnsw_index = nullptr;
hnsw::deserialize(res, hnsw_params, "index.bin", index->dim(), index->metric(), &hnsw_index);

 // Delete index after use
 delete hnsw_index;

Parameters:
  • res[in] raft resources

  • params[in] hnsw index parameters

  • filename[in] path to the file containing the serialized CAGRA index

  • dim[in] dimensions of the training dataset

  • metric[in] distance metric to search. Supported metrics (“L2Expanded”, “InnerProduct”)

  • index[out] hnsw index