Functions
C++ implementation of Dbscan algo

Fits a DBSCAN model on an input feature matrix and outputs the labels and core_sample_indices. More...

Functions

void ML::Dbscan::fit (const raft::handle_t &handle, float *input, int n_rows, int n_cols, float eps, int min_pts, raft::distance::DistanceType metric, int *labels, int *core_sample_indices=nullptr, float *sample_weight=nullptr, size_t max_bytes_per_batch=0, EpsNnMethod eps_nn_method=BRUTE_FORCE, int verbosity=CUML_LEVEL_INFO, bool opg=false)
 
void ML::Dbscan::fit (const raft::handle_t &handle, double *input, int n_rows, int n_cols, double eps, int min_pts, raft::distance::DistanceType metric, int *labels, int *core_sample_indices=nullptr, double *sample_weight=nullptr, size_t max_bytes_per_batch=0, EpsNnMethod eps_nn_method=BRUTE_FORCE, int verbosity=CUML_LEVEL_INFO, bool opg=false)
 
void ML::Dbscan::fit (const raft::handle_t &handle, float *input, int64_t n_rows, int64_t n_cols, float eps, int min_pts, raft::distance::DistanceType metric, int64_t *labels, int64_t *core_sample_indices=nullptr, float *sample_weight=nullptr, size_t max_bytes_per_batch=0, EpsNnMethod eps_nn_method=BRUTE_FORCE, int verbosity=CUML_LEVEL_INFO, bool opg=false)
 
void ML::Dbscan::fit (const raft::handle_t &handle, double *input, int64_t n_rows, int64_t n_cols, double eps, int min_pts, raft::distance::DistanceType metric, int64_t *labels, int64_t *core_sample_indices=nullptr, double *sample_weight=nullptr, size_t max_bytes_per_batch=0, EpsNnMethod eps_nn_method=BRUTE_FORCE, int verbosity=CUML_LEVEL_INFO, bool opg=false)
 

Detailed Description

Fits a DBSCAN model on an input feature matrix and outputs the labels and core_sample_indices.

Parameters
[in]handlecuml handle to use across the algorithm
[in]inputrow-major input feature matrix or distance matrix
[in]n_rowsnumber of samples in the input feature matrix
[in]n_colsnumber of features in the input feature matrix
[in]epsepsilon value to use for epsilon-neighborhood determination
[in]min_ptsminimum number of points to determine a cluster
[in]metricmetric type (or precomputed)
[out]labels(size n_rows) output labels array
[out]core_sample_indices(size n_rows) output array containing the indices of each core point. If the number of core points is less than n_rows, the right will be padded with -1. Setting this to NULL will prevent calculating the core sample indices
[in]sample_weight(size n_rows) input array containing the weight of each sample to be taken instead of a plain sum to fulfill the min_pts criteria for core points. NULL will default to weights of 1 for all samples
[in]max_bytes_per_batchthe maximum number of megabytes to be used for each batch of the pairwise distance calculation. This enables the trade off between memory usage and algorithm execution time.
[in]eps_nn_methodmethod for computing epsilon neighborhood
[in]verbosityverbosity level for logging messages during execution
[in]opgwhether we are running in a multi-node multi-GPU context

Function Documentation

◆ fit() [1/4]

void ML::Dbscan::fit ( const raft::handle_t &  handle,
double *  input,
int  n_rows,
int  n_cols,
double  eps,
int  min_pts,
raft::distance::DistanceType  metric,
int *  labels,
int *  core_sample_indices = nullptr,
double *  sample_weight = nullptr,
size_t  max_bytes_per_batch = 0,
EpsNnMethod  eps_nn_method = BRUTE_FORCE,
int  verbosity = CUML_LEVEL_INFO,
bool  opg = false 
)

◆ fit() [2/4]

void ML::Dbscan::fit ( const raft::handle_t &  handle,
double *  input,
int64_t  n_rows,
int64_t  n_cols,
double  eps,
int  min_pts,
raft::distance::DistanceType  metric,
int64_t *  labels,
int64_t *  core_sample_indices = nullptr,
double *  sample_weight = nullptr,
size_t  max_bytes_per_batch = 0,
EpsNnMethod  eps_nn_method = BRUTE_FORCE,
int  verbosity = CUML_LEVEL_INFO,
bool  opg = false 
)

◆ fit() [3/4]

void ML::Dbscan::fit ( const raft::handle_t &  handle,
float *  input,
int  n_rows,
int  n_cols,
float  eps,
int  min_pts,
raft::distance::DistanceType  metric,
int *  labels,
int *  core_sample_indices = nullptr,
float *  sample_weight = nullptr,
size_t  max_bytes_per_batch = 0,
EpsNnMethod  eps_nn_method = BRUTE_FORCE,
int  verbosity = CUML_LEVEL_INFO,
bool  opg = false 
)

◆ fit() [4/4]

void ML::Dbscan::fit ( const raft::handle_t &  handle,
float *  input,
int64_t  n_rows,
int64_t  n_cols,
float  eps,
int  min_pts,
raft::distance::DistanceType  metric,
int64_t *  labels,
int64_t *  core_sample_indices = nullptr,
float *  sample_weight = nullptr,
size_t  max_bytes_per_batch = 0,
EpsNnMethod  eps_nn_method = BRUTE_FORCE,
int  verbosity = CUML_LEVEL_INFO,
bool  opg = false 
)