CAGRA#
CAGRA is a graph-based nearest neighbors algorithm that was built from the ground up for GPU acceleration. CAGRA demonstrates state-of-the art index build and query performance for both small- and large-batch sized search.
#include <raft/neighbors/cagra.h>
Index build parameters#
-
enum cuvsCagraGraphBuildAlgo#
Enum to denote which ANN algorithm is used to build CAGRA graph.
Values:
-
enumerator AUTO_SELECT#
-
enumerator IVF_PQ#
-
enumerator NN_DESCENT#
-
enumerator AUTO_SELECT#
-
typedef struct cuvsCagraCompressionParams *cuvsCagraCompressionParams_t#
-
typedef struct cuvsCagraIndexParams *cuvsCagraIndexParams_t#
-
cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t *params)#
Allocate CAGRA Index params, and populate with default values.
- Parameters:
params – [in] cuvsCagraIndexParams_t to allocate
- Returns:
cuvsError_t
-
cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params)#
De-allocate CAGRA Index params.
- Parameters:
params – [in]
- Returns:
cuvsError_t
-
cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t *params)#
Allocate CAGRA Compression params, and populate with default values.
- Parameters:
params – [in] cuvsCagraCompressionParams_t to allocate
- Returns:
cuvsError_t
-
cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params)#
De-allocate CAGRA Compression params.
- Parameters:
params – [in]
- Returns:
cuvsError_t
-
struct cuvsCagraCompressionParams#
- #include <cagra.h>
Parameters for VPQ compression.
Public Members
-
uint32_t pq_bits#
The bit length of the vector element after compression by PQ.
Possible values: [4, 5, 6, 7, 8].
Hint: the smaller the ‘pq_bits’, the smaller the index size and the better the search performance, but the lower the recall.
-
uint32_t pq_dim#
The dimensionality of the vector after compression by PQ. When zero, an optimal value is selected using a heuristic.
TODO: at the moment
dim
must be a multiplepq_dim
.
-
uint32_t vq_n_centers#
Vector Quantization (VQ) codebook size - number of “coarse cluster centers”. When zero, an optimal value is selected using a heuristic.
-
uint32_t kmeans_n_iters#
The number of iterations searching for kmeans centers (both VQ & PQ phases).
-
double vq_kmeans_trainset_fraction#
The fraction of data to use during iterative kmeans building (VQ phase). When zero, an optimal value is selected using a heuristic.
-
double pq_kmeans_trainset_fraction#
The fraction of data to use during iterative kmeans building (PQ phase). When zero, an optimal value is selected using a heuristic.
-
uint32_t pq_bits#
-
struct cuvsCagraIndexParams#
- #include <cagra.h>
Supplemental parameters to build CAGRA Index.
Public Members
-
size_t intermediate_graph_degree#
Degree of input graph for pruning.
-
size_t graph_degree#
Degree of output graph.
-
enum cuvsCagraGraphBuildAlgo build_algo#
ANN algorithm to build knn graph.
-
size_t nn_descent_niter#
Number of Iterations to run if building with NN_DESCENT
-
cuvsCagraCompressionParams_t compression#
Optional: specify compression parameters if compression is desired.
NOTE: this is experimental new API, consider it unsafe.
-
size_t intermediate_graph_degree#
Index search parameters#
-
enum cuvsCagraSearchAlgo#
Enum to denote algorithm used to search CAGRA Index.
Values:
-
enumerator SINGLE_CTA#
For large batch sizes.
-
enumerator MULTI_CTA#
For small batch sizes.
-
enumerator MULTI_KERNEL#
-
enumerator AUTO#
-
enumerator SINGLE_CTA#
-
enum cuvsCagraHashMode#
Enum to denote Hash Mode used while searching CAGRA index.
Values:
-
enumerator HASH#
-
enumerator SMALL#
-
enumerator AUTO_HASH#
-
enumerator HASH#
-
typedef struct cuvsCagraSearchParams *cuvsCagraSearchParams_t#
-
cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t *params)#
Allocate CAGRA search params, and populate with default values.
- Parameters:
params – [in] cuvsCagraSearchParams_t to allocate
- Returns:
cuvsError_t
-
cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params)#
De-allocate CAGRA search params.
- Parameters:
params – [in]
- Returns:
cuvsError_t
-
struct cuvsCagraSearchParams#
- #include <cagra.h>
Supplemental parameters to search CAGRA index.
Public Members
-
size_t max_queries#
Maximum number of queries to search at the same time (batch size). Auto select when 0.
-
size_t itopk_size#
Number of intermediate search results retained during the search.
This is the main knob to adjust trade off between accuracy and search speed. Higher values improve the search accuracy.
-
size_t max_iterations#
Upper limit of search iterations. Auto select when 0.
-
enum cuvsCagraSearchAlgo algo#
Which search implementation to use.
-
size_t team_size#
Number of threads used to calculate a single distance. 4, 8, 16, or 32.
-
size_t search_width#
Number of graph nodes to select as the starting point for the search in each iteration. aka search width?
-
size_t min_iterations#
Lower limit of search iterations.
-
size_t thread_block_size#
Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.
-
enum cuvsCagraHashMode hashmap_mode#
Hashmap type. Auto selection when AUTO.
-
size_t hashmap_min_bitlen#
Lower limit of hashmap bit length. More than 8.
-
float hashmap_max_fill_rate#
Upper limit of hashmap fill rate. More than 0.1, less than 0.9.
-
uint32_t num_random_samplings#
Number of iterations of initial random seed node selection. 1 or more.
-
uint64_t rand_xor_mask#
Bit mask used for initial random seed node selection.
-
size_t max_queries#
Index#
-
typedef cuvsCagraIndex *cuvsCagraIndex_t#
-
cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t *index)#
Allocate CAGRA index.
- Parameters:
index – [in] cuvsCagraIndex_t to allocate
- Returns:
cagraError_t
-
cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index)#
De-allocate CAGRA index.
- Parameters:
index – [in] cuvsCagraIndex_t to de-allocate
-
cuvsError_t cuvsCagraIndexGetDims(cuvsCagraIndex_t index, int *dim)#
Get dimension of the CAGRA index.
- Parameters:
index – [in] CAGRA index
dim – [out] return dimension of the index
- Returns:
cuvsError_t
-
struct cuvsCagraIndex#
- #include <cagra.h>
Struct to hold address of cuvs::neighbors::cagra::index and its active trained dtype.
Index build#
-
cuvsError_t cuvsCagraBuild(cuvsResources_t res, cuvsCagraIndexParams_t params, DLManagedTensor *dataset, cuvsCagraIndex_t index)#
Build a CAGRA index with a
DLManagedTensor
which has underlyingDLDeviceType
equal tokDLCUDA
,kDLCUDAHost
,kDLCUDAManaged
, orkDLCPU
. Also, acceptable underlying types are:kDLDataType.code == kDLFloat
andkDLDataType.bits = 32
kDLDataType.code == kDLInt
andkDLDataType.bits = 8
kDLDataType.code == kDLUInt
andkDLDataType.bits = 8
#include <cuvs/core/c_api.h> #include <cuvs/neighbors/cagra.h> // Create cuvsResources_t cuvsResources_t res; cuvsError_t res_create_status = cuvsResourcesCreate(&res); // Assume a populated `DLManagedTensor` type here DLManagedTensor dataset; // Create default index params cuvsCagraIndexParams_t params; cuvsError_t params_create_status = cuvsCagraIndexParamsCreate(¶ms); // Create CAGRA index cuvsCagraIndex_t index; cuvsError_t index_create_status = cuvsCagraIndexCreate(&index); // Build the CAGRA Index cuvsError_t build_status = cuvsCagraBuild(res, params, &dataset, index); // de-allocate `params`, `index` and `res` cuvsError_t params_destroy_status = cuvsCagraIndexParamsDestroy(params); cuvsError_t index_destroy_status = cuvsCagraIndexDestroy(index); cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
- Parameters:
res – [in] cuvsResources_t opaque C handle
params – [in] cuvsCagraIndexParams_t used to build CAGRA index
dataset – [in] DLManagedTensor* training dataset
index – [out] cuvsCagraIndex_t Newly built CAGRA index
- Returns:
cuvsError_t
Index search#
-
cuvsError_t cuvsCagraSearch(cuvsResources_t res, cuvsCagraSearchParams_t params, cuvsCagraIndex_t index, DLManagedTensor *queries, DLManagedTensor *neighbors, DLManagedTensor *distances)#
Search a CAGRA index with a
DLManagedTensor
which has underlyingDLDeviceType
equal tokDLCUDA
,kDLCUDAHost
,kDLCUDAManaged
. It is also important to note that the CAGRA Index must have been built with the same type ofqueries
, such thatindex.dtype.code == queries.dl_tensor.dtype.code
Types for input are:queries
: a.kDLDataType.code == kDLFloat
andkDLDataType.bits = 32
b.kDLDataType.code == kDLInt
andkDLDataType.bits = 8
c.kDLDataType.code == kDLUInt
andkDLDataType.bits = 8
neighbors
:kDLDataType.code == kDLUInt
andkDLDataType.bits = 32
distances
:kDLDataType.code == kDLFloat
andkDLDataType.bits = 32
#include <cuvs/core/c_api.h> #include <cuvs/neighbors/cagra.h> // Create cuvsResources_t cuvsResources_t res; cuvsError_t res_create_status = cuvsResourcesCreate(&res); // Assume a populated `DLManagedTensor` type here DLManagedTensor dataset; DLManagedTensor queries; DLManagedTensor neighbors; // Create default search params cuvsCagraSearchParams_t params; cuvsError_t params_create_status = cuvsCagraSearchParamsCreate(¶ms); // Search the `index` built using `cuvsCagraBuild` cuvsError_t search_status = cuvsCagraSearch(res, params, index, &queries, &neighbors, &distances); // de-allocate `params` and `res` cuvsError_t params_destroy_status = cuvsCagraSearchParamsDestroy(params); cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
- Parameters:
res – [in] cuvsResources_t opaque C handle
params – [in] cuvsCagraSearchParams_t used to search CAGRA index
index – [in] cuvsCagraIndex which has been returned by
cuvsCagraBuild
queries – [in] DLManagedTensor* queries dataset to search
neighbors – [out] DLManagedTensor* output
k
neighbors for queriesdistances – [out] DLManagedTensor* output
k
distances for queries
Index serialize#
Warning
doxygengroup: Cannot find group “cagra_c_index_serialize” in doxygen xml output for project “cuvs” from directory: ../../cpp/doxygen/_xml/