Attention
The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called cuVS. We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and they will be removed from RAFT altogether in the 24.12 (December) release.
IVF-PQ#
#include <raft/neighbors/ivf_pq.cuh>
namespace raft::neighbors::ivf_pq
-
enum class codebook_gen#
A type for specifying how PQ codebooks are created.
Values:
-
enumerator PER_SUBSPACE#
-
enumerator PER_CLUSTER#
-
enumerator PER_SUBSPACE#
-
template<typename IdxT, typename SizeT = uint32_t>
using list_data = ivf::list<list_spec, SizeT, IdxT>#
-
static constexpr uint32_t kIndexGroupSize = 32
Size of the interleaved group.
-
static constexpr uint32_t kIndexGroupVecLen = 16#
Stride of the interleaved group for vectorized loads.
-
template<typename IdxT>
static constexpr IdxT kOutOfBoundsRecord = std::numeric_limits<IdxT>::max()# Default value returned by
search
when then_probes
is too small and top-k is too large. One may encounter it if the combined size of probed clusters is smaller than the requested number of results per query.
-
template<typename T, typename IdxT = uint32_t>
index<IdxT> build(raft::resources const &handle, const index_params ¶ms, raft::device_matrix_view<const T, IdxT, row_major> dataset)# Build the index from the dataset for efficient search.
NB: Currently, the following distance metrics are supported:
L2Expanded
L2Unexpanded
InnerProduct
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
params – [in] configure the index building
dataset – [in] a device matrix view to a row-major matrix [n_rows, dim]
- Returns:
the constructed ivf-pq index
-
template<typename T, typename IdxT>
index<IdxT> extend(raft::resources const &handle, raft::device_matrix_view<const T, IdxT, row_major> new_vectors, std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, const index<IdxT> &idx)# Extend the index with the new data. *.
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
new_vectors – [in] a device matrix view to a row-major matrix [n_rows, idx.dim()]
new_indices – [in] a device vector view to a vector of indices [n_rows]. If the original index is empty (
idx.size() == 0
), you can passstd::nullopt
here to imply a continuous range[0...n_rows)
.idx – [inout]
-
template<typename T, typename IdxT>
void extend(raft::resources const &handle, raft::device_matrix_view<const T, IdxT, row_major> new_vectors, std::optional<raft::device_vector_view<const IdxT, IdxT>> new_indices, index<IdxT> *idx)# Extend the index with the new data. *.
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
handle – [in]
new_vectors – [in] a device matrix view to a row-major matrix [n_rows, idx.dim()]
new_indices – [in] a device vector view to a vector of indices [n_rows]. If the original index is empty (
idx.size() == 0
), you can passstd::nullopt
here to imply a continuous range[0...n_rows)
.idx – [inout]
-
template<typename T, typename IdxT, typename IvfSampleFilterT>
void search_with_filtering(raft::resources const &handle, const search_params ¶ms, const index<IdxT> &idx, raft::device_matrix_view<const T, uint32_t, row_major> queries, raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors, raft::device_matrix_view<float, uint32_t, row_major> distances, IvfSampleFilterT sample_filter = IvfSampleFilterT{})# Search ANN using the constructed index with the given filter.
See the ivf_pq::build documentation for a usage example.
Note, this function requires a temporary buffer to store intermediate results between cuda kernel calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can pass a pool memory resource or a large enough pre-allocated memory resource to reduce or eliminate entirely allocations happening within
search
. The exact size of the temporary buffer depends on multiple factors and is an implementation detail. However, you can safely specify a small initial size for the memory pool, so that only a few allocations happen to grow it during the first invocations of thesearch
.- Template Parameters:
T – data element type
IdxT – type of the indices
IvfSampleFilterT – Device filter function, with the signature
(uint32_t query_ix, uint32 cluster_ix, uint32_t sample_ix) -> bool
or(uint32_t query_ix, uint32 sample_ix) -> bool
- Parameters:
handle – [in]
params – [in] configure the search
idx – [in] ivf-pq constructed index
queries – [in] a device matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device matrix view to the distances to the selected neighbors [n_queries, k]
sample_filter – [in] a device filter function that greenlights samples for a given query.
-
template<typename T, typename IdxT>
void search(raft::resources const &handle, const search_params ¶ms, const index<IdxT> &idx, raft::device_matrix_view<const T, uint32_t, row_major> queries, raft::device_matrix_view<IdxT, uint32_t, row_major> neighbors, raft::device_matrix_view<float, uint32_t, row_major> distances)# Search ANN using the constructed index.
See the ivf_pq::build documentation for a usage example.
Note, this function requires a temporary buffer to store intermediate results between cuda kernel calls, which may lead to undesirable allocations and slowdown. To alleviate the problem, you can pass a pool memory resource or a large enough pre-allocated memory resource to reduce or eliminate entirely allocations happening within
search
. The exact size of the temporary buffer depends on multiple factors and is an implementation detail. However, you can safely specify a small initial size for the memory pool, so that only a few allocations happen to grow it during the first invocations of thesearch
.- Template Parameters:
T – data element type
IdxT – type of the indices
- Parameters:
handle – [in]
params – [in] configure the search
idx – [in] ivf-pq constructed index
queries – [in] a device matrix view to a row-major matrix [n_queries, index->dim()]
neighbors – [out] a device matrix view to the indices of the neighbors in the source dataset [n_queries, k]
distances – [out] a device matrix view to the distances to the selected neighbors [n_queries, k]
-
struct index_params : public raft::neighbors::ann::index_params#
- #include <ivf_pq_types.hpp>
Public Members
-
uint32_t n_lists = 1024#
The number of inverted lists (clusters)
Hint: the number of vectors per cluster (
n_rows/n_lists
) should be approximately 1,000 to 10,000.
-
uint32_t kmeans_n_iters = 20#
The number of iterations searching for kmeans centers (index building).
-
double kmeans_trainset_fraction = 0.5#
The fraction of data to use during iterative kmeans building.
-
uint32_t pq_bits = 8#
The bit length of the vector element after compression by PQ.
Possible values: [4, 5, 6, 7, 8].
Hint: the smaller the ‘pq_bits’, the smaller the index size and the better the search performance, but the lower the recall.
-
uint32_t pq_dim = 0#
The dimensionality of the vector after compression by PQ. When zero, an optimal value is selected using a heuristic.
NB:
pq_dim * pq_bits
must be a multiple of 8.Hint: a smaller ‘pq_dim’ results in a smaller index size and better search performance, but lower recall. If ‘pq_bits’ is 8, ‘pq_dim’ can be set to any number, but multiple of 8 are desirable for good performance. If ‘pq_bits’ is not 8, ‘pq_dim’ should be a multiple of 8. For good performance, it is desirable that ‘pq_dim’ is a multiple of 32. Ideally, ‘pq_dim’ should be also a divisor of the dataset dim.
-
codebook_gen codebook_kind = codebook_gen::PER_SUBSPACE#
How PQ codebooks are created.
-
bool force_random_rotation = false#
Apply a random rotation matrix on the input data and queries even if
dim % pq_dim == 0
.Note: if
dim
is not multiple ofpq_dim
, a random rotation is always applied to the input data and queries to transform the working space fromdim
torot_dim
, which may be slightly larger than the original space and and is a multiple ofpq_dim
(rot_dim % pq_dim == 0
). However, this transform is not necessary whendim
is multiple ofpq_dim
(dim == rot_dim
, hence no need in adding “extra” data columns / features).By default, if
dim == rot_dim
, the rotation transform is initialized with the identity matrix. Whenforce_random_rotation == true
, a random orthogonal transform matrix is generated regardless of the values ofdim
andpq_dim
.
-
bool conservative_memory_allocation = false#
By default, the algorithm allocates more space than necessary for individual clusters (
list_data
). This allows to amortize the cost of memory allocation and reduce the number of data copies during repeated calls toextend
(extending the database).The alternative is the conservative allocation behavior; when enabled, the algorithm always allocates the minimum amount of memory required to store the given number of records. Set this flag to
true
if you prefer to use as little GPU memory for the database as possible.
Public Static Functions
-
template<typename DataT, typename Accessor>
static inline index_params from_dataset(mdspan<const DataT, matrix_extent<int64_t>, row_major, Accessor> dataset, raft::distance::DistanceType metric = raft::distance::L2Expanded)# Creates index_params based on shape of the input dataset. Usage example:
using namespace raft::neighbors; raft::resources res; // create index_params for a [N. D] dataset and have InnerProduct as the distance metric auto dataset = raft::make_device_matrix<float, int64_t>(res, N, D); ivf_pq::index_params index_params = ivf_pq::index_params::from_dataset(dataset.view(), raft::distance::InnerProduct); // modify/update index_params as needed index_params.add_data_on_build = true;
-
uint32_t n_lists = 1024#
-
struct search_params : public raft::neighbors::ann::search_params#
- #include <ivf_pq_types.hpp>
Public Members
-
uint32_t n_probes = 20#
The number of clusters to search.
-
cudaDataType_t lut_dtype = CUDA_R_32F#
Data type of look up table to be created dynamically at search time.
Possible values: [CUDA_R_32F, CUDA_R_16F, CUDA_R_8U]
The use of low-precision types reduces the amount of shared memory required at search time, so fast shared memory kernels can be used even for datasets with large dimansionality. Note that the recall is slightly degraded when low-precision type is selected.
-
cudaDataType_t internal_distance_dtype = CUDA_R_32F#
Storage data type for distance/similarity computed at search time.
Possible values: [CUDA_R_16F, CUDA_R_32F]
If the performance limiter at search time is device memory access, selecting FP16 will improve performance slightly.
-
double preferred_shmem_carveout = 1.0#
Preferred fraction of SM’s unified memory / L1 cache to be used as shared memory.
Possible values: [0.0 - 1.0] as a fraction of the
sharedMemPerMultiprocessor
.One wants to increase the carveout to make sure a good GPU occupancy for the main search kernel, but not to keep it too high to leave some memory to be used as L1 cache. Note, this value is interpreted only as a hint. Moreover, a GPU usually allows only a fixed set of cache configurations, so the provided value is rounded up to the nearest configuration. Refer to the NVIDIA tuning guide for the target GPU architecture.
Note, this is a low-level tuning parameter that can have drastic negative effects on the search performance if tweaked incorrectly.
-
uint32_t n_probes = 20#
-
template<typename SizeT, typename IdxT>
struct list_spec# - #include <ivf_pq_types.hpp>
Public Types
-
using list_extents = extents<SizeT, dynamic_extent, dynamic_extent, kIndexGroupSize, kIndexGroupVecLen>#
PQ-encoded data stored in the interleaved format:
[ ceildiv(list_size, kIndexGroupSize) , ceildiv(pq_dim, (kIndexGroupVecLen * 8u) / pq_bits) , kIndexGroupSize , kIndexGroupVecLen ].
Public Functions
-
inline constexpr auto make_list_extents(SizeT n_rows) const -> list_extents#
Determine the extents of an array enough to hold a given amount of data.
-
using list_extents = extents<SizeT, dynamic_extent, dynamic_extent, kIndexGroupSize, kIndexGroupVecLen>#
-
template<typename IdxT>
struct index : public raft::neighbors::ann::index# - #include <ivf_pq_types.hpp>
IVF-PQ index.
In the IVF-PQ index, a database vector y is approximated with two level quantization:
y = Q_1(y) + Q_2(y - Q_1(y))
The first level quantizer (Q_1), maps the vector y to the nearest cluster center. The number of clusters is n_lists.
The second quantizer encodes the residual, and it is defined as a product quantizer [1].
A product quantizer encodes a
dim
dimensional vector with apq_dim
dimensional vector. First we split the input vector intopq_dim
subvectors (denoted by u), where each u vector containspq_len
distinct components of yy_1, y_2, … y_{pq_len}, y_{pq_len+1}, … y_{2*pq_len}, … y_{dim-pq_len+1} … y_{dim} ___________________/ ____________________________/ ______________________/ u_1 u_2 u_{pq_dim}
Then each subvector encoded with a separate quantizer q_i, end the results are concatenated
Q_2(y) = q_1(u_1),q_2(u_2),…,q_{pq_dim}(u_pq_dim})
Each quantizer q_i outputs a code with pq_bit bits. The second level quantizers are also defined by k-means clustering in the corresponding sub-space: the reproduction values are the centroids, and the set of reproduction values is the codebook.
When the data dimensionality
dim
is not multiple ofpq_dim
, the feature space is transformed using a random orthogonal matrix to haverot_dim = pq_dim * pq_len
dimensions (rot_dim >= dim
).The second-level quantizers are trained either for each subspace or for each cluster: (a) codebook_gen::PER_SUBSPACE: creates
pq_dim
second-level quantizers - one for each slice of the data along features; (b) codebook_gen::PER_CLUSTER: createsn_lists
second-level quantizers - one for each first-level cluster. In either case, the centroids are again found using k-means clustering interpreting the data as having pq_len dimensions.[1] Product quantization for nearest neighbor search Herve Jegou, Matthijs Douze, Cordelia Schmid
- Template Parameters:
IdxT – type of the indices in the source dataset
Public Functions
-
inline constexpr auto dim() const noexcept -> uint32_t#
Dimensionality of the input data.
-
inline constexpr auto dim_ext() const noexcept -> uint32_t#
Dimensionality of the cluster centers: input data dim extended with vector norms and padded to 8 elems.
-
inline constexpr auto rot_dim() const noexcept -> uint32_t#
Dimensionality of the data after transforming it for PQ processing (rotated and augmented to be muplitple of
pq_dim
).
-
inline constexpr auto pq_bits() const noexcept -> uint32_t#
The bit length of an encoded vector element after compression by PQ.
-
inline constexpr auto pq_dim() const noexcept -> uint32_t#
The dimensionality of an encoded vector after compression by PQ.
-
inline constexpr auto pq_len() const noexcept -> uint32_t#
Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace
-
inline constexpr auto pq_book_size() const noexcept -> uint32_t#
The number of vectors in a PQ codebook (
1 << pq_bits
).
-
inline constexpr auto metric() const noexcept -> raft::distance::DistanceType#
Distance metric used for clustering.
-
inline constexpr auto codebook_kind() const noexcept -> codebook_gen#
How PQ codebooks are created.
-
inline constexpr auto n_lists() const noexcept -> uint32_t#
Number of clusters/inverted lists (first level quantization).
-
inline constexpr auto conservative_memory_allocation() const noexcept -> bool#
Whether to use convervative memory allocation when extending the list (cluster) data (see index_params.conservative_memory_allocation).
-
inline index(raft::resources const &handle, raft::distance::DistanceType metric, codebook_gen codebook_kind, uint32_t n_lists, uint32_t dim, uint32_t pq_bits = 8, uint32_t pq_dim = 0, bool conservative_memory_allocation = false)#
Construct an empty index. It needs to be trained and then populated.
-
inline index(raft::resources const &handle, const index_params ¶ms, uint32_t dim)#
Construct an empty index. It needs to be trained and then populated.
-
inline auto pq_centers() noexcept -> device_mdspan<float, pq_centers_extents, row_major>#
PQ cluster centers
codebook_gen::PER_SUBSPACE: [pq_dim , pq_len, pq_book_size]
codebook_gen::PER_CLUSTER: [n_lists, pq_len, pq_book_size]
-
inline auto lists() noexcept -> std::vector<std::shared_ptr<list_data<IdxT>>>&#
Lists’ data and indices.
-
inline auto data_ptrs() noexcept -> device_vector_view<uint8_t*, uint32_t, row_major>#
Pointers to the inverted lists (clusters) data [n_lists].
-
inline auto inds_ptrs() noexcept -> device_vector_view<IdxT*, uint32_t, row_major>#
Pointers to the inverted lists (clusters) indices [n_lists].
-
inline auto rotation_matrix() noexcept -> device_matrix_view<float, uint32_t, row_major>#
The transform matrix (original space -> rotated padded space) [rot_dim, dim]
-
inline auto accum_sorted_sizes() noexcept -> host_vector_view<IdxT, uint32_t, row_major>#
Accumulated list sizes, sorted in descending order [n_lists + 1]. The last value contains the total length of the index. The value at index zero is always zero.
That is, the content of this span is as if the
list_sizes
was sorted and then accumulated.This span is used during search to estimate the maximum size of the workspace.
-
inline auto list_sizes() noexcept -> device_vector_view<uint32_t, uint32_t, row_major>#
Sizes of the lists [n_lists].
-
inline auto centers() noexcept -> device_matrix_view<float, uint32_t, row_major>#
Cluster centers corresponding to the lists in the original space [n_lists, dim_ext]
-
inline auto centers_rot() noexcept -> device_matrix_view<float, uint32_t, row_major>#
Cluster centers corresponding to the lists in the rotated space [n_lists, rot_dim]
-
inline auto get_list_size_in_bytes(uint32_t label) -> uint32_t#
fetch size of a particular IVF list in bytes using the list extents. Usage example:
raft::resources res; // use default index params ivf_pq::index_params index_params; // extend the IVF lists while building the index index_params.add_data_on_build = true; // create and fill the index from a [N, D] dataset auto index = raft::neighbors::ivf_pq::build<int64_t>(res, index_params, dataset, N, D); // Fetch the size of the fourth list uint32_t size = index.get_list_size_in_bytes(3);
- Parameters:
label – [in] list ID
Serializer Methods#
#include <raft/neighbors/ivf_pq_serialize.cuh>
namespace raft::neighbors::ivf_pq
-
template<typename IdxT>
void serialize(raft::resources const &handle, std::ostream &os, const index<IdxT> &index)# Write the index to an output stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create an output stream std::ostream os(std::cout.rdbuf()); // create an index with `auto index = ivf_pq::build(...);` raft::serialize(handle, os, index);
- Template Parameters:
IdxT – type of the index
- Parameters:
handle – [in] the raft handle
os – [in] output stream
index – [in] IVF-PQ index
-
template<typename IdxT>
void serialize(raft::resources const &handle, const std::string &filename, const index<IdxT> &index)# Save the index to file.
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); // create an index with `auto index = ivf_pq::build(...);` raft::serialize(handle, filename, index);
- Template Parameters:
IdxT – type of the index
- Parameters:
handle – [in] the raft handle
filename – [in] the file name for saving the index
index – [in] IVF-PQ index
-
template<typename IdxT>
index<IdxT> deserialize(raft::resources const &handle, std::istream &is)# Load index from input stream
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create an input stream std::istream is(std::cin.rdbuf()); using IdxT = int; // type of the index auto index = raft::deserialize<IdxT>(handle, is);
- Template Parameters:
IdxT – type of the index
- Parameters:
handle – [in] the raft handle
is – [in] input stream
- Returns:
raft::neighbors::ivf_pq::index<IdxT>
-
template<typename IdxT>
index<IdxT> deserialize(raft::resources const &handle, const std::string &filename)# Load index from file.
Experimental, both the API and the serialization format are subject to change.
#include <raft/core/resources.hpp> raft::resources handle; // create a string with a filepath std::string filename("/path/to/index"); using IdxT = int; // type of the index auto index = raft::deserialize<IdxT>(handle, filename);
- Template Parameters:
IdxT – type of the index
- Parameters:
handle – [in] the raft handle
filename – [in] the name of the file that stores the index
- Returns:
raft::neighbors::ivf_pq::index<IdxT>
Candidate Refinement#
#include <raft/neighbors/refine.cuh>
namespace raft::neighbors
-
template<typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
void refine(raft::resources const &handle, raft::device_matrix_view<const data_t, matrix_idx, row_major> dataset, raft::device_matrix_view<const data_t, matrix_idx, row_major> queries, raft::device_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, raft::device_matrix_view<idx_t, matrix_idx, row_major> indices, raft::device_matrix_view<distance_t, matrix_idx, row_major> distances, distance::DistanceType metric = distance::DistanceType::L2Unexpanded)# Refine nearest neighbor search.
Refinement is an operation that follows an approximate NN search. The approximate search has already selected n_candidates neighbor candidates for each query. We narrow it down to k neighbors. For each query, we calculate the exact distance between the query and its n_candidates neighbor candidate, and select the k nearest ones.
The k nearest neighbors and distances are returned.
Example usage
using namespace raft::neighbors; // use default index parameters ivf_pq::index_params index_params; // create and fill the index from a [N, D] dataset auto index = ivf_pq::build(handle, index_params, dataset, N, D); // use default search parameters ivf_pq::search_params search_params; // search m = 4 * k nearest neighbours for each of the N queries ivf_pq::search(handle, search_params, index, queries, N, 4 * k, neighbor_candidates, out_dists_tmp); // refine it to the k nearest one refine(handle, dataset, queries, neighbor_candidates, out_indices, out_dists, index.metric());
- Parameters:
handle – [in] the raft handle
dataset – [in] device matrix that stores the dataset [n_rows, dims]
queries – [in] device matrix of the queries [n_queris, dims]
neighbor_candidates – [in] indices of candidate vectors [n_queries, n_candidates], where n_candidates >= k
indices – [out] device matrix that stores the refined indices [n_queries, k]
distances – [out] device matrix that stores the refined distances [n_queries, k]
metric – [in] distance metric to use. Euclidean (L2) is used by default
-
template<typename idx_t, typename data_t, typename distance_t, typename matrix_idx>
void refine(raft::resources const &handle, raft::host_matrix_view<const data_t, matrix_idx, row_major> dataset, raft::host_matrix_view<const data_t, matrix_idx, row_major> queries, raft::host_matrix_view<const idx_t, matrix_idx, row_major> neighbor_candidates, raft::host_matrix_view<idx_t, matrix_idx, row_major> indices, raft::host_matrix_view<distance_t, matrix_idx, row_major> distances, distance::DistanceType metric = distance::DistanceType::L2Unexpanded)# Same as above, but all input and out data is in host memory.
- Parameters:
handle – [in] the raft handle
dataset – [in] host matrix that stores the dataset [n_rows, dims]
queries – [in] host matrix of the queries [n_queris, dims]
neighbor_candidates – [in] host matrix with indices of candidate vectors [n_queries, n_candidates], where n_candidates >= k
indices – [out] host matrix that stores the refined indices [n_queries, k]
distances – [out] host matrix that stores the refined distances [n_queries, k]
metric – [in] distance metric to use. Euclidean (L2) is used by default
Helper Methods#
#include <raft/neighbors/ivf_pq_helpers.cuh>
namespace raft::neighbors::ivf_pq::helpers
-
template<typename IdxT>
void pack_list_data(raft::resources const &res, index<IdxT> *index, device_matrix_view<const uint8_t, uint32_t, row_major> codes, uint32_t label, uint32_t offset)# Write flat PQ codes into an existing list by the given offset.
The list is identified by its label.
NB: no memory allocation happens here; the list must fit the data (offset + n_vec).
Usage example:
// We will write into the 137th cluster uint32_t label = 137; // allocate the buffer for the input codes auto codes = raft::make_device_matrix<const uint8_t>(res, n_vec, index.pq_dim()); ... prepare n_vecs to pack into the list in codes ... // write codes into the list starting from the 42nd position ivf_pq::helpers::pack_list_data(res, &index, codes_to_pack, label, 42);
- Parameters:
res – [in] raft resource
index – [inout] IVF-PQ index.
codes – [in] flat PQ codes, one code per byte [n_rows, pq_dim]
label – [in] The id of the list (cluster) into which we write.
offset – [in] how many records to skip before writing the data into the list
-
template<typename IdxT>
void pack_contiguous_list_data(raft::resources const &res, index<IdxT> *index, uint8_t *codes, uint32_t n_rows, uint32_t label, uint32_t offset)# Write flat PQ codes into an existing list by the given offset. Use this when the input vectors are PQ encoded and not expanded to one code per byte.
The list is identified by its label.
NB: no memory allocation happens here; the list into which the vectors are packed must fit offset
n_rows rows.
Usage example:
using namespace raft::neighbors; raft::resources res; // use default index parameters ivf_pq::index_params index_params; // create and fill the index from a [N, D] dataset auto index = ivf_pq::build(res, index_params, dataset, N, D); // allocate the buffer for n_rows input codes. Each vector occupies // raft::ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes because // codes are compressed and without gaps. auto codes = raft::make_device_matrix<const uint8_t>( res, n_rows, raft::ceildiv(index.pq_dim() * index.pq_bits(), 8)); ... prepare the compressed vectors to pack into the list in codes ... // the first n_rows codes in the fourth IVF list are to be overwritten. uint32_t label = 3; // write codes into the list starting from the 0th position ivf_pq::helpers::pack_contiguous_list_data( res, &index, codes.data_handle(), n_rows, label, 0);
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-PQ index
codes – [in] flat contiguous PQ codes [n_rows, ceildiv(pq_dim * pq_bits, 8)]
n_rows – [in] how many records to pack
label – [in] The id of the list (cluster) into which we write.
offset – [in] how many records to skip before writing the data into the list
-
template<typename IdxT>
void unpack_list_data(raft::resources const &res, const index<IdxT> &index, device_matrix_view<uint8_t, uint32_t, row_major> out_codes, uint32_t label, uint32_t offset)# Unpack
n_take
consecutive records of a single list (cluster) in the compressed index starting at givenoffset
, one code per byte (independently of pq_bits).Usage example:
// We will unpack the fourth cluster uint32_t label = 3; // Get the list size uint32_t list_size = 0; raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, resource::get_cuda_stream(res)); resource::sync_stream(res); // allocate the buffer for the output auto codes = raft::make_device_matrix<float>(res, list_size, index.pq_dim()); // unpack the whole list ivf_pq::helpers::unpack_list_data(res, index, codes.view(), label, 0);
- Template Parameters:
IdxT – type of the indices in the source dataset
- Parameters:
res – [in]
index – [in]
out_codes – [out] the destination buffer [n_take, index.pq_dim()]. The length
n_take
defines how many records to unpack, it must be smaller than the list size.label – [in] The id of the list (cluster) to decode.
offset – [in] How many records in the list to skip.
-
template<typename IdxT>
void unpack_list_data(raft::resources const &res, const index<IdxT> &index, device_vector_view<const uint32_t> in_cluster_indices, device_matrix_view<uint8_t, uint32_t, row_major> out_codes, uint32_t label)# Unpack a series of records of a single list (cluster) in the compressed index by their in-list offsets, one code per byte (independently of pq_bits).
Usage example:
// We will unpack the fourth cluster uint32_t label = 3; // Create the selection vector auto selected_indices = raft::make_device_vector<uint32_t>(res, 4); ... fill the indices ... resource::sync_stream(res); // allocate the buffer for the output auto codes = raft::make_device_matrix<float>(res, selected_indices.size(), index.pq_dim()); // decode the whole list ivf_pq::helpers::unpack_list_data( res, index, selected_indices.view(), codes.view(), label);
- Template Parameters:
IdxT – type of the indices in the source dataset
- Parameters:
res – [in] raft resource
index – [in] IVF-PQ index (passed by reference)
in_cluster_indices – [in] The offsets of the selected indices within the cluster.
out_codes – [out] the destination buffer [n_take, index.pq_dim()]. The length
n_take
defines how many records to unpack, it must be smaller than the list size.label – [in] The id of the list (cluster) to decode.
-
template<typename IdxT>
void unpack_contiguous_list_data(raft::resources const &res, const index<IdxT> &index, uint8_t *out_codes, uint32_t n_rows, uint32_t label, uint32_t offset)# Unpack
n_rows
consecutive PQ encoded vectors of a single list (cluster) in the compressed index starting at givenoffset
, not expanded to one code per byte. Each code in the output buffer occupies ceildiv(index.pq_dim() * index.pq_bits(), 8) bytes.Usage example:
raft::resources res; // We will unpack the whole fourth cluster uint32_t label = 3; // Get the list size uint32_t list_size = 0; raft::update_host(&list_size, index.list_sizes().data_handle() + label, 1, raft::resource::get_cuda_stream(res)); raft::resource::sync_stream(res); // allocate the buffer for the output auto codes = raft::make_device_matrix<float>(res, list_size, raft::ceildiv(index.pq_dim() * index.pq_bits(), 8)); // unpack the whole list ivf_pq::helpers::unpack_list_data(res, index, codes.data_handle(), list_size, label, 0);
- Template Parameters:
IdxT – type of the indices in the source dataset
- Parameters:
res – [in] raft resource
index – [in] IVF-PQ index (passed by reference)
out_codes – [out] the destination buffer [n_rows, ceildiv(index.pq_dim() * index.pq_bits(), 8)]. The length
n_rows
defines how many records to unpack, offset + n_rows must be smaller than or equal to the list size.n_rows – [in] how many codes to unpack
label – [in] The id of the list (cluster) to decode.
offset – [in] How many records in the list to skip.
-
template<typename T, typename IdxT>
void reconstruct_list_data(raft::resources const &res, const index<IdxT> &index, device_matrix_view<T, uint32_t, row_major> out_vectors, uint32_t label, uint32_t offset)# Decode
n_take
consecutive records of a single list (cluster) in the compressed index starting at givenoffset
.Usage example:
// We will reconstruct the fourth cluster uint32_t label = 3; // Get the list size uint32_t list_size = 0; raft::copy(&list_size, index.list_sizes().data_handle() + label, 1, resource::get_cuda_stream(res)); resource::sync_stream(res); // allocate the buffer for the output auto decoded_vectors = raft::make_device_matrix<float>(res, list_size, index.dim()); // decode the whole list ivf_pq::helpers::reconstruct_list_data(res, index, decoded_vectors.view(), label, 0);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
res – [in]
index – [in]
out_vectors – [out] the destination buffer [n_take, index.dim()]. The length
n_take
defines how many records to reconstruct, it must be smaller than the list size.label – [in] The id of the list (cluster) to decode.
offset – [in] How many records in the list to skip.
-
template<typename T, typename IdxT>
void reconstruct_list_data(raft::resources const &res, const index<IdxT> &index, device_vector_view<const uint32_t> in_cluster_indices, device_matrix_view<T, uint32_t, row_major> out_vectors, uint32_t label)# Decode a series of records of a single list (cluster) in the compressed index by their in-list offsets.
Usage example:
// We will reconstruct the fourth cluster uint32_t label = 3; // Create the selection vector auto selected_indices = raft::make_device_vector<uint32_t>(res, 4); ... fill the indices ... resource::sync_stream(res); // allocate the buffer for the output auto decoded_vectors = raft::make_device_matrix<float>( res, selected_indices.size(), index.dim()); // decode the whole list ivf_pq::helpers::reconstruct_list_data( res, index, selected_indices.view(), decoded_vectors.view(), label);
- Template Parameters:
T – data element type
IdxT – type of the indices in the source dataset
- Parameters:
res – [in]
index – [in]
in_cluster_indices – [in] The offsets of the selected indices within the cluster.
out_vectors – [out] the destination buffer [n_take, index.dim()]. The length
n_take
defines how many records to reconstruct, it must be smaller than the list size.label – [in] The id of the list (cluster) to decode.
-
template<typename IdxT>
void extend_list_with_codes(raft::resources const &res, index<IdxT> *index, device_matrix_view<const uint8_t, uint32_t, row_major> new_codes, device_vector_view<const IdxT, uint32_t, row_major> new_indices, uint32_t label)# Extend one list of the index in-place, by the list label, skipping the classification and encoding steps.
Usage example:
// We will extend the fourth cluster uint32_t label = 3; // We will fill 4 new vectors uint32_t n_vec = 4; // Indices of the new vectors auto indices = raft::make_device_vector<uint32_t>(res, n_vec); ... fill the indices ... auto new_codes = raft::make_device_matrix<uint8_t, uint32_t, row_major> new_codes( res, n_vec, index.pq_dim()); ... fill codes ... // extend list with new codes ivf_pq::helpers::extend_list_with_codes( res, &index, codes.view(), indices.view(), label);
- Template Parameters:
IdxT –
- Parameters:
res – [in]
index – [inout]
new_codes – [in] flat PQ codes, one code per byte [n_rows, index.pq_dim()]
new_indices – [in] source indices [n_rows]
label – [in] the id of the target list (cluster).
-
template<typename T, typename IdxT>
void extend_list(raft::resources const &res, index<IdxT> *index, device_matrix_view<const T, uint32_t, row_major> new_vectors, device_vector_view<const IdxT, uint32_t, row_major> new_indices, uint32_t label)# Extend one list of the index in-place, by the list label, skipping the classification step.
Usage example:
// We will extend the fourth cluster uint32_t label = 3; // We will extend with 4 new vectors uint32_t n_vec = 4; // Indices of the new vectors auto indices = raft::make_device_vector<uint32_t>(res, n_vec); ... fill the indices ... auto new_vectors = raft::make_device_matrix<float, uint32_t, row_major> new_codes( res, n_vec, index.dim()); ... fill vectors ... // extend list with new vectors ivf_pq::helpers::extend_list( res, &index, new_vectors.view(), indices.view(), label);
- Template Parameters:
T –
IdxT –
- Parameters:
res – [in]
index – [inout]
new_vectors – [in] data to encode [n_rows, index.dim()]
new_indices – [in] source indices [n_rows]
label – [in] the id of the target list (cluster).
-
template<typename IdxT>
void erase_list(raft::resources const &res, index<IdxT> *index, uint32_t label)# Remove all data from a single list (cluster) in the index.
Usage example:
// We will erase the fourth cluster (label = 3) ivf_pq::helpers::erase_list(res, &index, 3);
- Template Parameters:
IdxT –
- Parameters:
res – [in]
index – [inout]
label – [in] the id of the target list (cluster).
-
template<typename IdxT>
void reset_index(const raft::resources &res, index<IdxT> *index)# Public helper API to reset the data and indices ptrs, and the list sizes. Useful for externally modifying the index without going through the build stage. The data and indices of the IVF lists will be lost.
Usage example:
raft::resources res; using namespace raft::neighbors; // use default index parameters ivf_pq::index_params index_params; // initialize an empty index ivf_pq::index<int64_t> index(res, index_params, D); // reset the index's state and list sizes ivf_pq::helpers::reset_index(res, &index);
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-PQ index
-
template<typename IdxT>
void make_rotation_matrix(raft::resources const &res, index<IdxT> *index, bool force_random_rotation)# Public helper API exposing the computation of the index’s rotation matrix. NB: This is to be used only when the rotation matrix is not already computed through raft::neighbors::ivf_pq::build.
Usage example:
raft::resources res; // use default index parameters ivf_pq::index_params index_params; // force random rotation index_params.force_random_rotation = true; // initialize an empty index raft::neighbors::ivf_pq::index<int64_t> index(res, index_params, D); // reset the index reset_index(res, &index); // compute the rotation matrix with random_rotation raft::neighbors::ivf_pq::helpers::make_rotation_matrix( res, &index, index_params.force_random_rotation);
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-PQ index
force_random_rotation – [in] whether to apply a random rotation matrix on the input data. See raft::neighbors::ivf_pq::index_params for more details.
-
template<typename IdxT>
void set_centers(raft::resources const &res, index<IdxT> *index, device_matrix_view<const float, uint32_t> cluster_centers)# Public helper API for externally modifying the index’s IVF centroids. NB: The index must be reset before this. Use raft::neighbors::ivf_pq::extend to construct IVF lists according to new centroids.
Usage example:
raft::resources res; // allocate the buffer for the input centers auto cluster_centers = raft::make_device_matrix<float, uint32_t>(res, index.n_lists(), index.dim()); ... prepare ivf centroids in cluster_centers ... // reset the index reset_index(res, &index); // recompute the state of the index raft::neighbors::ivf_pq::helpers::recompute_internal_state(res, index); // Write the IVF centroids raft::neighbors::ivf_pq::helpers::set_centers( res, &index, cluster_centers);
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-PQ index
cluster_centers – [in] new cluster centers [index.n_lists(), index.dim()]
-
template<typename IdxT>
void recompute_internal_state(const raft::resources &res, index<IdxT> *index)# Helper exposing the re-computation of list sizes and related arrays if IVF lists have been modified.
Usage example:
using namespace raft::neighbors; raft::resources res; // use default index parameters ivf_pq::index_params index_params; // initialize an empty index ivf_pq::index<int64_t> index(res, index_params, D); ivf_pq::helpers::reset_index(res, &index); // resize the first IVF list to hold 5 records auto spec = list_spec<uint32_t, int64_t>{ index->pq_bits(), index->pq_dim(), index->conservative_memory_allocation()}; uint32_t new_size = 5; ivf::resize_list(res, list, spec, new_size, 0); raft::update_device(index.list_sizes(), &new_size, 1, stream); // recompute the internal state of the index ivf_pq::helpers::recompute_internal_state(res, &index);
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [inout] pointer to IVF-PQ index
-
template<typename IdxT>
void extract_centers(raft::resources const &res, const index<IdxT> &index, raft::device_matrix_view<float> cluster_centers)# Public helper API for fetching a trained index’s IVF centroids into a buffer that may be allocated on either host or device.
Usage example:
raft::resources res; // allocate the buffer for the output centers auto cluster_centers = raft::make_device_matrix<float, uint32_t>( res, index.n_lists(), index.dim()); // Extract the IVF centroids into the buffer raft::neighbors::ivf_pq::helpers::extract_centers(res, index, cluster_centers.data_handle());
- Template Parameters:
IdxT –
- Parameters:
res – [in] raft resource
index – [in] IVF-PQ index (passed by reference)
cluster_centers – [out] IVF cluster centers [index.n_lists(), index.dim]