Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.08 (August) release.
IVF-Flat#
#include <raft/neighbors/ivf_flat.cuh>
namespace raft::neighbors::ivf_flat
-
template<typename T, typename IdxT>
auto extend(raft::resources const &handle, raft::device_matrix_view<const T, IdxT, row_major> new_vectors, std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, const index<T, IdxT> &orig_index) -> index<T, IdxT># Build a new index containing the data of the original plus new extra vectors.
Implementation note: The new data is clustered according to existing kmeans clusters, then the cluster centers are adjusted to match the newly labeled data.
Usage example:
using namespace raft::neighbors; ivf_flat::index_params index_params; index_params.add_data_on_build = false; // don't populate index on build index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training // train the index from a [N, D] dataset auto index_empty = ivf_flat::build(handle, dataset, index_params, dataset); // fill the index with the data std::optional<raft::device_vector_view<const IdxT, IdxT>> no_op = std::nullopt; auto index = ivf_flat::extend(handle, index_empty, no_op, dataset);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
new_vectors – [in] raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
new_indices – [in] optional raft::device_vector_view to a vector of indices [n_rows]. If the original index is empty (
orig_index.size() == 0
), you can passstd::nullopt
here to imply a continuous range[0...n_rows)
.orig_index – [in] original index
- Returns:
the constructed extended ivf-flat index
-
template<typename T, typename IdxT>
auto extend(raft::resources const &handle, raft::host_matrix_view<const T, IdxT, row_major> new_vectors, std::optional<raft::host_vector_view<const IdxT, IdxT>> new_indices, const index<T, IdxT> &orig_index) -> index<T, IdxT># Extend the index with additional vectors.
This overloads takes input data in host memory.
-
template<typename T, typename IdxT>
void extend(raft::resources const &handle, raft::device_matrix_view<const T, IdxT, row_major> new_vectors, std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, index<T, IdxT> *index)# Extend the index in-place with the new data.
Usage example:
using namespace raft::neighbors; ivf_flat::index_params index_params; index_params.add_data_on_build = false; // don't populate index on build index_params.kmeans_trainset_fraction = 1.0; // use whole dataset for kmeans training // train the index from a [N, D] dataset auto index_empty = ivf_flat::build(handle, index_params, dataset); // fill the index with the data std::optional<raft::device_vector_view<const IdxT, IdxT>> no_op = std::nullopt; ivf_flat::extend(handle, dataset, no_opt, &index_empty);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
new_vectors – [in] raft::device_matrix_view to a row-major matrix [n_rows, index.dim()]
new_indices – [in] optional raft::device_vector_view to a vector of indices [n_rows]. If the original index is empty (
orig_index.size() == 0
), you can passstd::nullopt
here to imply a continuous range[0...n_rows)
.index – [inout] pointer to index, to be overwritten in-place
-
template<typename T, typename IdxT>
void extend(raft::resources const &handle, raft::host_matrix_view<const T, IdxT, row_major> new_vectors, std::optional<raft::host_vector_view<const IdxT, IdxT>> new_indices, index<T, IdxT> *index)# Extend the index with additional vectors.
This overloads takes input data in host memory.
-
template<typename T, typename IdxT, typename IvfSampleFilterT>
void search_with_filtering(raft::resources const &handle, const search_params ¶ms, const index<T, IdxT> &index, raft::device_matrix_view<const T, IdxT, row_major> queries, raft::device_matrix_view<IdxT, IdxT, row_major> neighbors, raft::device_matrix_view<float, IdxT, row_major> distances, IvfSampleFilterT sample_filter = IvfSampleFilterT())# Search ANN using the constructed index with the given filter.
See the ivf_flat::build documentation for a usage example.
Note, this function requires a temporary buffer to store intermediate results between cuda kernel calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can pass a pool memory resource or a large enough pre-allocated memory resource to reduce or eliminate entirely allocations happening within
search
:... // use default search parameters ivf_flat::search_params search_params; filtering::none_ivf_sample_filter filter; // Use the same allocator across multiple searches to reduce the number of // cuda memory allocations ivf_flat::search_with_filtering( handle, search_params, index, queries1, out_inds1, out_dists1, filter); ivf_flat::search_with_filtering( handle, search_params, index, queries2, out_inds2, out_dists2, filter); ivf_flat::search_with_filtering( handle, search_params, index, queries3, out_inds3, out_dists3, filter); ...
- Template Parameters:
T – data element type
IdxT – type of the indices
IvfSampleFilterT – Device filter function, with the signature
(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool
or(uint32_t query_ix, uint32 sample_ix) -> bool
- Parameters:
handle – [in]
params – [in] configure the search
index – [in] ivf-flat constructed index
queries – [in] a device pointer to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device pointer to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device pointer to the distances to the selected neighbors [n_queries, k]
sample_filter – [in] a device filter function that greenlights samples for a given query
-
template<typename T, typename IdxT>
void search(raft::resources const &handle, const search_params ¶ms, const index<T, IdxT> &index, raft::device_matrix_view<const T, IdxT, row_major> queries, raft::device_matrix_view<IdxT, IdxT, row_major> neighbors, raft::device_matrix_view<float, IdxT, row_major> distances)# Search ANN using the constructed index.
See the ivf_flat::build documentation for a usage example.
Note, this function requires a temporary buffer to store intermediate results between cuda kernel calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can pass a pool memory resource or a large enough pre-allocated memory resource to reduce or eliminate entirely allocations happening within
search
:... // use default search parameters ivf_flat::search_params search_params; // Use the same allocator across multiple searches to reduce the number of // cuda memory allocations ivf_flat::search(handle, search_params, index, queries1, out_inds1, out_dists1); ivf_flat::search(handle, search_params, index, queries2, out_inds2, out_dists2); ivf_flat::search(handle, search_params, index, queries3, out_inds3, out_dists3); ...
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in]
params – [in] configure the search
index – [in] ivf-flat constructed index
queries – [in] a device pointer to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device pointer to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device pointer to the distances to the selected neighbors [n_queries, k]
-
template<typename ValueT, typename IdxT, typename SizeT = uint32_t>
using list_data = ivf::list<list_spec, SizeT, ValueT, IdxT>#
-
static constexpr uint32_t kIndexGroupSize = 32#
Size of the interleaved group (see
index::data
description).
-
template<typename T, typename IdxT>
auto build(raft::resources const &handle, const index_params ¶ms, raft::device_matrix_view<const T, IdxT, row_major> dataset) -> index<T, IdxT># Build the index from the dataset for efficient search.
NB: Currently, the following distance metrics are supported:
L2Expanded
L2Unexpanded
InnerProduct
Usage example:
using namespace raft::neighbors; // use default index parameters ivf_flat::index_params index_params; // create and fill the index from a [N, D] dataset auto index = ivf_flat::build(handle, dataset, index_params); // use default search parameters ivf_flat::search_params search_params; // search K nearest neighbours for each of the N queries ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
params – [in] configure the index building
dataset – [in] a device matrix [n_rows, dim]
- Returns:
the constructed ivf-flat index
-
template<typename T, typename IdxT>
auto build(raft::resources const &handle, const index_params ¶ms, raft::host_matrix_view<const T, IdxT, row_major> dataset) -> index<T, IdxT># Build the index from a dataset in host memory.
-
template<typename T, typename IdxT>
void build(raft::resources const &handle, const index_params ¶ms, raft::device_matrix_view<const T, IdxT, row_major> dataset, raft::neighbors::ivf_flat::index<T, IdxT> &idx)# Build the index from the dataset for efficient search.
NB: Currently, the following distance metrics are supported:
L2Expanded
L2Unexpanded
InnerProduct
Usage example:
using namespace raft::neighbors; // use default index parameters ivf_flat::index_params index_params; // create and fill the index from a [N, D] dataset ivf_flat::index<decltype(dataset::value_type), decltype(dataset::index_type)> index; ivf_flat::build(handle, dataset, index_params, index); // use default search parameters ivf_flat::search_params search_params; // search K nearest neighbours for each of the N queries ivf_flat::search(handle, search_params, index, queries, out_inds, out_dists);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
params – [in] configure the index building
dataset – [in] raft::device_matrix_view to a row-major matrix [n_rows, dim]
idx – [out] reference to ivf_flat::index
-
template<typename T, typename IdxT>
void build(raft::resources const &handle, const index_params ¶ms, raft::host_matrix_view<const T, IdxT, row_major> dataset, raft::neighbors::ivf_flat::index<T, IdxT> &idx)# Build the index from a dataset in host memory.
-
struct index_params : public raft::neighbors::ann::index_params#
- #include <ivf_flat_types.hpp>
Public Members
-
uint32_t n_lists = 1024#
The number of inverted lists (clusters)
-
uint32_t kmeans_n_iters = 20#
The number of iterations searching for kmeans centers (index building).
-
double kmeans_trainset_fraction = 0.5#
The fraction of data to use during iterative kmeans building.
-
bool adaptive_centers = false#
By default (adaptive_centers = false), the cluster centers are trained in
ivf_flat::build
, and never modified inivf_flat::extend
. As a result, you may need to retrain the index from scratch after invoking (ivf_flat::extend
) a few times with new data, the distribution of which is no longer representative of the original training set.The alternative behavior (adaptive_centers = true) is to update the cluster centers for new data when it is added. In this case,
index.centers()
are always exactly the centroids of the data in the corresponding clusters. The drawback of this behavior is that the centroids depend on the order of adding new data (through the classification of the added data); that is,index.centers()
“drift” together with the changing distribution of the newly added data.
-
bool conservative_memory_allocation = false#
By default, the algorithm allocates more space than necessary for individual clusters (
list_data
). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls toextend
(extending the database).The alternative is the conservative allocation behavior; when enabled, the algorithm always allocates the minimum amount of memory required to store the given number of records. Set this flag to
true
if you prefer to use as little GPU memory for the database as possible.
-
uint32_t n_lists = 1024#
-
struct search_params : public raft::neighbors::ann::search_params#
- #include <ivf_flat_types.hpp>
Public Members
-
uint32_t n_probes = 20#
The number of clusters to search.
-
uint32_t n_probes = 20#
-
template<typename SizeT, typename ValueT, typename IdxT>
struct list_spec# - #include <ivf_flat_types.hpp>
-
template<typename T, typename IdxT>
struct index : public raft::neighbors::ann::index# - #include <ivf_flat_types.hpp>
IVF-flat index.
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
Public Functions
-
inline constexpr auto veclen() const noexcept -> uint32_t#
Vectorized load/store size in elements, determines the size of interleaved data chunks.
TODO: in theory, we can lift this to the template parameter and keep it at hardware maximum possible value by padding the
dim
of the data rapidsai/raft#711
-
inline constexpr auto metric() const noexcept -> raft::distance::DistanceType#
Distance metric used for clustering.
-
inline constexpr auto adaptive_centers() const noexcept -> bool#
Whether
centers()
change upon extending the index (ivf_pq::extend).
-
inline auto list_sizes() noexcept -> device_vector_view<uint32_t, uint32_t>#
Inverted list data [size, dim].
The data consists of the dataset rows, grouped by their labels (into clusters/lists). Within each list (cluster), the data is grouped into blocks of
kIndexGroupSize
interleaved vectors. Note, the total index length is slightly larger than the source dataset length, because each cluster is padded bykIndexGroupSize
elements.Interleaving pattern: within groups of
kIndexGroupSize
rows, the data is interleaved with the block size equal toveclen * sizeof(T)
. That is, a chunk ofveclen
consecutive components of one row is followed by a chunk of the same size of the next row, and so on.Example: veclen = 2, dim = 6, kIndexGroupSize = 32, list_size = 31
Sizes of the lists (clusters) [n_lists] NB: This may differ from the actual list size if the shared lists have been extended by another indexx[ 0, 0], x[ 0, 1], x[ 1, 0], x[ 1, 1], ... x[14, 0], x[14, 1], x[15, 0], x[15, 1], x[16, 0], x[16, 1], x[17, 0], x[17, 1], ... x[30, 0], x[30, 1], - , - , x[ 0, 2], x[ 0, 3], x[ 1, 2], x[ 1, 3], ... x[14, 2], x[14, 3], x[15, 2], x[15, 3], x[16, 2], x[16, 3], x[17, 2], x[17, 3], ... x[30, 2], x[30, 3], - , - , x[ 0, 4], x[ 0, 5], x[ 1, 4], x[ 1, 5], ... x[14, 4], x[14, 5], x[15, 4], x[15, 5], x[16, 4], x[16, 5], x[17, 4], x[17, 5], ... x[30, 4], x[30, 5], - , - ,
-
inline auto centers() noexcept -> device_matrix_view<float, uint32_t, row_major>#
k-means cluster centers corresponding to the lists [n_lists, dim]
-
inline auto center_norms() noexcept -> std::optional<device_vector_view<float, uint32_t>>#
(Optional) Precomputed norms of the
centers
w.r.t. the chosen distance metric [n_lists].NB: this may be empty if the index is empty or if the metric does not require the center norms calculation.
-
inline auto accum_sorted_sizes() noexcept -> host_vector_view<IdxT, uint32_t>#
Accumulated list sizes, sorted in descending order [n_lists + 1]. The last value contains the total length of the index. The value at index zero is always zero.
That is, the content of this span is as if the
list_sizes
was sorted and then accumulated.This span is used during search to estimate the maximum size of the workspace.
-
inline constexpr auto dim() const noexcept -> uint32_t#
Dimensionality of the data.
-
inline constexpr auto n_lists() const noexcept -> uint32_t#
Number of clusters/inverted lists.
-
inline index(raft::resources const &res, raft::distance::DistanceType metric, uint32_t n_lists, bool adaptive_centers, bool conservative_memory_allocation, uint32_t dim)#
Construct an empty index. It needs to be trained and then populated.
-
inline index(raft::resources const &res, const index_params ¶ms, uint32_t dim)#
Construct an empty index. It needs to be trained and then populated.
-
inline auto data_ptrs() noexcept -> device_vector_view<T*, uint32_t>#
Pointers to the inverted lists (clusters) data [n_lists].
-
inline auto inds_ptrs() noexcept -> device_vector_view<IdxT*, uint32_t>#
Pointers to the inverted lists (clusters) indices [n_lists].
-
inline constexpr auto conservative_memory_allocation() const noexcept -> bool#
Whether to use convervative memory allocation when extending the list (cluster) data (see index_params.conservative_memory_allocation).
-
inline auto lists() noexcept -> std::vector<std::shared_ptr<list_data<T, IdxT>>>&#
Lists’ data and indices.
-
inline void check_consistency()#
Throw an error if the index content is inconsistent.
Serializer Methods#
#include <raft/neighbors/ivf_flat_serialize.cuh>
namespace raft::neighbors::ivf_flat
-
template<typename T, typename IdxT>
void serialize(raft::resources const &handle, std::ostream &os, const index<T, IdxT> &index)# Write the index to an output stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create an output stream std::ostream os(std::cout.rdbuf()); // create an index with `auto index = ivf_flat::build(...);` raft::serialize(handle, os, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
os – [in] output stream
index – [in] IVF-Flat index
-
template<typename T, typename IdxT>
void serialize(raft::resources const &handle, const std::string &filename, const index<T, IdxT> &index)# Save the index to file.
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); // create an index with `auto index = ivf_flat::build(...);` raft::serialize(handle, filename, index);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
filename – [in] the file name for saving the index
index – [in] IVF-Flat index
-
template<typename T, typename IdxT>
index<T, IdxT> deserialize(raft::resources const &handle, std::istream &is) Load index from input stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create an input stream std::istream is(std::cin.rdbuf()); using T = float; // data element type using IdxT = int; // type of the index auto index = raft::deserialize<T, IdxT>(handle, is);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
is – [in] input stream
- Returns:
raft::neighbors::ivf_flat::index<T, IdxT>
-
template<typename T, typename IdxT>
index<T, IdxT> deserialize(raft::resources const &handle, const std::string &filename) Load index from file.
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); using T = float; // data element type using IdxT = int; // type of the index auto index = raft::deserialize<T, IdxT>(handle, filename);
- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in] the raft handle
filename – [in] the name of the file that stores the index
- Returns:
raft::neighbors::ivf_flat::index<T, IdxT>
Helper Methods#
#include <raft/neighbors/ivf_flat_helpers.cuh>
namespace raft::neighbors::ivf_flat::helpers
-
template<typename T, typename IdxT>
void reset_index(const raft::resources &res, index<T, IdxT> *index)# Public helper API to reset the data and indices ptrs, and the list sizes. Useful for externally modifying the index without going through the build stage. The data and indices of the IVF lists will be lost.
Usage example:
raft::resources res; using namespace raft::neighbors; // use default index parameters ivf_flat::index_params index_params; // initialize an empty index ivf_flat::index<int64_t> index(res, index_params, D); // reset the index's state and list sizes ivf_flat::helpers::reset_index(res, &index);
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-PQ index
-
template<typename T, typename IdxT>
void recompute_internal_state(const raft::resources &res, index<T, IdxT> *index)# Helper exposing the re-computation of list sizes and related arrays if IVF lists have been modified.
Usage example:
using namespace raft::neighbors; raft::resources res; // use default index parameters ivf_flat::index_params index_params; // initialize an empty index ivf_flat::index<int64_t> index(res, index_params, D); ivf_flat::helpers::reset_index(res, &index); // recompute the internal state of the index ivf_flat::helpers::recompute_internal_state(res, &index);
- Template Parameters:
T –
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-FLAT index