CAGRA#

CAGRA is a graph-based nearest neighbors algorithm that was built from the ground up for GPU acceleration. CAGRA demonstrates state-of-the art index build and query performance for both small- and large-batch sized search.

#include <raft/neighbors/cagra.h>

Index build parameters#

enum cuvsCagraGraphBuildAlgo#

Enum to denote which ANN algorithm is used to build CAGRA graph.

Values:

enumerator AUTO_SELECT#
enumerator IVF_PQ#
enumerator NN_DESCENT#
typedef struct cuvsCagraCompressionParams *cuvsCagraCompressionParams_t#
typedef struct cuvsCagraIndexParams *cuvsCagraIndexParams_t#
cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t *params)#

Allocate CAGRA Index params, and populate with default values.

Parameters:

params[in] cuvsCagraIndexParams_t to allocate

Returns:

cuvsError_t

cuvsError_t cuvsCagraIndexParamsDestroy(cuvsCagraIndexParams_t params)#

De-allocate CAGRA Index params.

Parameters:

params[in]

Returns:

cuvsError_t

cuvsError_t cuvsCagraCompressionParamsCreate(cuvsCagraCompressionParams_t *params)#

Allocate CAGRA Compression params, and populate with default values.

Parameters:

params[in] cuvsCagraCompressionParams_t to allocate

Returns:

cuvsError_t

cuvsError_t cuvsCagraCompressionParamsDestroy(cuvsCagraCompressionParams_t params)#

De-allocate CAGRA Compression params.

Parameters:

params[in]

Returns:

cuvsError_t

struct cuvsCagraCompressionParams#
#include <cagra.h>

Parameters for VPQ compression.

Public Members

uint32_t pq_bits#

The bit length of the vector element after compression by PQ.

Possible values: [4, 5, 6, 7, 8].

Hint: the smaller the ‘pq_bits’, the smaller the index size and the better the search performance, but the lower the recall.

uint32_t pq_dim#

The dimensionality of the vector after compression by PQ. When zero, an optimal value is selected using a heuristic.

TODO: at the moment dim must be a multiple pq_dim.

uint32_t vq_n_centers#

Vector Quantization (VQ) codebook size - number of “coarse cluster centers”. When zero, an optimal value is selected using a heuristic.

uint32_t kmeans_n_iters#

The number of iterations searching for kmeans centers (both VQ & PQ phases).

double vq_kmeans_trainset_fraction#

The fraction of data to use during iterative kmeans building (VQ phase). When zero, an optimal value is selected using a heuristic.

double pq_kmeans_trainset_fraction#

The fraction of data to use during iterative kmeans building (PQ phase). When zero, an optimal value is selected using a heuristic.

struct cuvsCagraIndexParams#
#include <cagra.h>

Supplemental parameters to build CAGRA Index.

Public Members

size_t intermediate_graph_degree#

Degree of input graph for pruning.

size_t graph_degree#

Degree of output graph.

enum cuvsCagraGraphBuildAlgo build_algo#

ANN algorithm to build knn graph.

size_t nn_descent_niter#

Number of Iterations to run if building with NN_DESCENT

cuvsCagraCompressionParams_t compression#

Optional: specify compression parameters if compression is desired.

NOTE: this is experimental new API, consider it unsafe.

Index search parameters#

enum cuvsCagraSearchAlgo#

Enum to denote algorithm used to search CAGRA Index.

Values:

enumerator SINGLE_CTA#

For large batch sizes.

enumerator MULTI_CTA#

For small batch sizes.

enumerator MULTI_KERNEL#
enumerator AUTO#
enum cuvsCagraHashMode#

Enum to denote Hash Mode used while searching CAGRA index.

Values:

enumerator HASH#
enumerator SMALL#
enumerator AUTO_HASH#
typedef struct cuvsCagraSearchParams *cuvsCagraSearchParams_t#
cuvsError_t cuvsCagraSearchParamsCreate(cuvsCagraSearchParams_t *params)#

Allocate CAGRA search params, and populate with default values.

Parameters:

params[in] cuvsCagraSearchParams_t to allocate

Returns:

cuvsError_t

cuvsError_t cuvsCagraSearchParamsDestroy(cuvsCagraSearchParams_t params)#

De-allocate CAGRA search params.

Parameters:

params[in]

Returns:

cuvsError_t

struct cuvsCagraSearchParams#
#include <cagra.h>

Supplemental parameters to search CAGRA index.

Public Members

size_t max_queries#

Maximum number of queries to search at the same time (batch size). Auto select when 0.

size_t itopk_size#

Number of intermediate search results retained during the search.

This is the main knob to adjust trade off between accuracy and search speed. Higher values improve the search accuracy.

size_t max_iterations#

Upper limit of search iterations. Auto select when 0.

enum cuvsCagraSearchAlgo algo#

Which search implementation to use.

size_t team_size#

Number of threads used to calculate a single distance. 4, 8, 16, or 32.

size_t search_width#

Number of graph nodes to select as the starting point for the search in each iteration. aka search width?

size_t min_iterations#

Lower limit of search iterations.

size_t thread_block_size#

Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.

enum cuvsCagraHashMode hashmap_mode#

Hashmap type. Auto selection when AUTO.

size_t hashmap_min_bitlen#

Lower limit of hashmap bit length. More than 8.

float hashmap_max_fill_rate#

Upper limit of hashmap fill rate. More than 0.1, less than 0.9.

uint32_t num_random_samplings#

Number of iterations of initial random seed node selection. 1 or more.

uint64_t rand_xor_mask#

Bit mask used for initial random seed node selection.

Index#

typedef cuvsCagraIndex *cuvsCagraIndex_t#
cuvsError_t cuvsCagraIndexCreate(cuvsCagraIndex_t *index)#

Allocate CAGRA index.

Parameters:

index[in] cuvsCagraIndex_t to allocate

Returns:

cagraError_t

cuvsError_t cuvsCagraIndexDestroy(cuvsCagraIndex_t index)#

De-allocate CAGRA index.

Parameters:

index[in] cuvsCagraIndex_t to de-allocate

struct cuvsCagraIndex#
#include <cagra.h>

Struct to hold address of cuvs::neighbors::cagra::index and its active trained dtype.

Index build#

cuvsError_t cuvsCagraBuild(cuvsResources_t res, cuvsCagraIndexParams_t params, DLManagedTensor *dataset, cuvsCagraIndex_t index)#

Build a CAGRA index with a DLManagedTensor which has underlying DLDeviceType equal to kDLCUDA, kDLCUDAHost, kDLCUDAManaged, or kDLCPU. Also, acceptable underlying types are:

  1. kDLDataType.code == kDLFloat and kDLDataType.bits = 32

  2. kDLDataType.code == kDLInt and kDLDataType.bits = 8

  3. kDLDataType.code == kDLUInt and kDLDataType.bits = 8

#include <cuvs/core/c_api.h>
#include <cuvs/neighbors/cagra.h>

// Create cuvsResources_t
cuvsResources_t res;
cuvsError_t res_create_status = cuvsResourcesCreate(&res);

// Assume a populated `DLManagedTensor` type here
DLManagedTensor dataset;

// Create default index params
cuvsCagraIndexParams_t params;
cuvsError_t params_create_status = cuvsCagraIndexParamsCreate(&params);

// Create CAGRA index
cuvsCagraIndex_t index;
cuvsError_t index_create_status = cuvsCagraIndexCreate(&index);

// Build the CAGRA Index
cuvsError_t build_status = cuvsCagraBuild(res, params, &dataset, index);

// de-allocate `params`, `index` and `res`
cuvsError_t params_destroy_status = cuvsCagraIndexParamsDestroy(params);
cuvsError_t index_destroy_status = cuvsCagraIndexDestroy(index);
cuvsError_t res_destroy_status = cuvsResourcesDestroy(res);
Parameters:
  • res[in] cuvsResources_t opaque C handle

  • params[in] cuvsCagraIndexParams_t used to build CAGRA index

  • dataset[in] DLManagedTensor* training dataset

  • index[out] cuvsCagraIndex_t Newly built CAGRA index

Returns:

cuvsError_t