Namespaces | Classes | Typedefs | Enumerations | Functions | Variables
ML Namespace Reference

Namespaces

 CD
 
 Datasets
 
 Dbscan
 
 DT
 
 experimental
 
 Explainer
 
 fil
 
 GLM
 
 HDBSCAN
 
 HoltWinters
 
 Internals
 
 kmeans
 
 KNN
 
 Metrics
 
 OLS
 
 PCA
 
 Ridge
 
 Solver
 
 Sparse
 
 Spectral
 
 Stationarity
 
 SVM
 
 TSVD
 
 UMAP
 

Classes

class  Logger
 The main Logging class for cuML library. More...
 
class  PatternSetter
 RAII based pattern setter for Logger class. More...
 
class  pinned_host_vector
 
class  params
 
class  paramsSolver
 
class  paramsTSVDTemplate
 
class  paramsPCATemplate
 structure for pca parameters. Ref: http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html More...
 
struct  RF_metrics
 
struct  RF_params
 
struct  RandomForestMetaData
 
struct  knn_graph
 
struct  manifold_inputs_t
 
struct  manifold_dense_inputs_t
 
struct  manifold_sparse_inputs_t
 
struct  manifold_precomputed_knn_inputs_t
 
struct  TSNEParams
 
class  UMAPParams
 
struct  knnIndex
 
struct  knnIndexParam
 
struct  IVFParam
 
struct  IVFFlatParam
 
struct  IVFPQParam
 
struct  paramsRPROJ
 
struct  rand_mat
 
struct  ARIMAOrder
 
struct  ARIMAParams
 
struct  ARIMAMemory
 
struct  OptimParams
 
class  HandleMap
 
struct  SimpleDenseMat
 
struct  SimpleMat
 
struct  SimpleVec
 
struct  SimpleVecOwning
 
struct  SimpleMatOwning
 
struct  SimpleSparseMat
 

Typedefs

typedef paramsTSVDTemplate paramsTSVD
 
typedef paramsPCATemplate paramsPCA
 
typedef paramsPCATemplate< mg_solverparamsPCAMG
 
typedef paramsTSVDTemplate< mg_solverparamsTSVDMG
 
typedef RandomForestMetaData< float, int > RandomForestClassifierF
 
typedef RandomForestMetaData< double, int > RandomForestClassifierD
 
typedef RandomForestMetaData< float, float > RandomForestRegressorF
 
typedef RandomForestMetaData< double, double > RandomForestRegressorD
 
typedef int64_t knn_indices_dense_t
 
typedef int knn_indices_sparse_t
 
using nn_index_params = raft::neighbors::experimental::nn_descent::index_params
 

Enumerations

enum class  solver : int { COV_EIG_DQ , COV_EIG_JACOBI }
 
enum class  mg_solver { COV_EIG_DQ , COV_EIG_JACOBI , QR }
 
enum  RF_type { CLASSIFICATION , REGRESSION }
 
enum  task_category { REGRESSION_MODEL = 1 , CLASSIFICATION_MODEL = 2 }
 
enum  TSNE_ALGORITHM { EXACT , BARNES_HUT , FFT }
 
enum  TSNE_INIT { RANDOM , PCA }
 
enum  random_matrix_type { unset , dense , sparse }
 
enum  lr_type { OPTIMAL , CONSTANT , INVSCALING , ADAPTIVE }
 
enum  loss_funct { SQRD_LOSS , HINGE , LOG }
 
enum  penalty { NONE , L1 , L2 , ELASTICNET }
 
enum  CRITERION {
  GINI , ENTROPY , MSE , MAE ,
  POISSON , GAMMA , INVERSE_GAUSSIAN , CRITERION_END
}
 
enum  LoglikeMethod { CSS , MLE }
 
enum  SeasonalType { ADDITIVE , MULTIPLICATIVE }
 
enum  OptimCriterion { OPTIM_BFGS_ITER_LIMIT = 0 , OPTIM_MIN_PARAM_DIFF = 1 , OPTIM_MIN_ERROR_DIFF = 2 , OPTIM_MIN_GRAD_NORM = 3 }
 
enum  Norm { L0 , L1 , L2 , LINF }
 
enum  STORAGE_ORDER { COL_MAJOR = 0 , ROW_MAJOR = 1 }
 

Functions

void hdbscan (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::distance::DistanceType metric, HDBSCAN::Common::HDBSCANParams &params, HDBSCAN::Common::hdbscan_output< int, float > &out, float *core_dists)
 
void build_condensed_hierarchy (const raft::handle_t &handle, const int *children, const float *delta, const int *sizes, int min_cluster_size, int n_leaves, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree)
 
void _extract_clusters (const raft::handle_t &handle, size_t n_leaves, int n_edges, int *parents, int *children, float *lambdas, int *sizes, int *labels, float *probabilities, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, bool allow_single_cluster, int max_cluster_size, float cluster_selection_epsilon)
 
void compute_all_points_membership_vectors (const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, raft::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
 
void compute_membership_vector (const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, const float *points_to_predict, size_t n_prediction_points, int min_samples, raft::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
 
void out_of_sample_predict (const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, int *labels, const float *points_to_predict, size_t n_prediction_points, raft::distance::DistanceType metric, int min_samples, int *out_labels, float *out_probabilities)
 
void single_linkage_pairwise (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::hierarchy::linkage_output< int > *out, raft::distance::DistanceType metric, int n_clusters=5)
 Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using the full n^2 pairwise distance matrix. This can be very fast for smaller datasets when there is enough memory available. More...
 
void single_linkage_neighbors (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::hierarchy::linkage_output< int > *out, raft::distance::DistanceType metric=raft::distance::DistanceType::L2Unexpanded, int c=15, int n_clusters=5)
 Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using a k-nearest neighbors graph. While this strategy enables the algorithm to scale to much higher numbers of rows, it comes with the downside that additional knn steps may need to be executed to connect an otherwise unconnected k-nn graph. More...
 
void single_linkage_pairwise (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::hierarchy::linkage_output< int64_t > *out, raft::distance::DistanceType metric, int n_clusters=5)
 
std::string format (const char *fmt, va_list &vl)
 
std::string format (const char *fmt,...)
 
void pcaFit (raft::handle_t &handle, float *input, float *components, float *explained_var, float *explained_var_ratio, float *singular_vals, float *mu, float *noise_vars, const paramsPCA &prms)
 
void pcaFit (raft::handle_t &handle, double *input, double *components, double *explained_var, double *explained_var_ratio, double *singular_vals, double *mu, double *noise_vars, const paramsPCA &prms)
 
void pcaFitTransform (raft::handle_t &handle, float *input, float *trans_input, float *components, float *explained_var, float *explained_var_ratio, float *singular_vals, float *mu, float *noise_vars, const paramsPCA &prms)
 
void pcaFitTransform (raft::handle_t &handle, double *input, double *trans_input, double *components, double *explained_var, double *explained_var_ratio, double *singular_vals, double *mu, double *noise_vars, const paramsPCA &prms)
 
void pcaInverseTransform (raft::handle_t &handle, float *trans_input, float *components, float *singular_vals, float *mu, float *input, const paramsPCA &prms)
 
void pcaInverseTransform (raft::handle_t &handle, double *trans_input, double *components, double *singular_vals, double *mu, double *input, const paramsPCA &prms)
 
void pcaTransform (raft::handle_t &handle, float *input, float *components, float *trans_input, float *singular_vals, float *mu, const paramsPCA &prms)
 
void pcaTransform (raft::handle_t &handle, double *input, double *components, double *trans_input, double *singular_vals, double *mu, const paramsPCA &prms)
 
void tsvdFit (raft::handle_t &handle, float *input, float *components, float *singular_vals, const paramsTSVD &prms)
 
void tsvdFit (raft::handle_t &handle, double *input, double *components, double *singular_vals, const paramsTSVD &prms)
 
void tsvdInverseTransform (raft::handle_t &handle, float *trans_input, float *components, float *input, const paramsTSVD &prms)
 
void tsvdInverseTransform (raft::handle_t &handle, double *trans_input, double *components, double *input, const paramsTSVD &prms)
 
void tsvdTransform (raft::handle_t &handle, float *input, float *components, float *trans_input, const paramsTSVD &prms)
 
void tsvdTransform (raft::handle_t &handle, double *input, double *components, double *trans_input, const paramsTSVD &prms)
 
void tsvdFitTransform (raft::handle_t &handle, float *input, float *trans_input, float *components, float *explained_var, float *explained_var_ratio, float *singular_vals, const paramsTSVD &prms)
 
void tsvdFitTransform (raft::handle_t &handle, double *input, double *trans_input, double *components, double *explained_var, double *explained_var_ratio, double *singular_vals, const paramsTSVD &prms)
 
RF_metrics set_all_rf_metrics (RF_type rf_type, float accuracy, double mean_abs_error, double mean_squared_error, double median_abs_error)
 
RF_metrics set_rf_metrics_classification (float accuracy)
 
RF_metrics set_rf_metrics_regression (double mean_abs_error, double mean_squared_error, double median_abs_error)
 
void print (const RF_metrics rf_metrics)
 
void preprocess_labels (int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO)
 
void postprocess_labels (int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO)
 
template<class T , class L >
void delete_rf_metadata (RandomForestMetaData< T, L > *forest)
 
template<class T , class L >
std::string get_rf_summary_text (const RandomForestMetaData< T, L > *forest)
 
template<class T , class L >
std::string get_rf_detailed_text (const RandomForestMetaData< T, L > *forest)
 
template<class T , class L >
std::string get_rf_json (const RandomForestMetaData< T, L > *forest)
 
template<class T , class L >
void build_treelite_forest (TreeliteModelHandle *model, const RandomForestMetaData< T, L > *forest, int num_features)
 
TreeliteModelHandle concatenate_trees (std::vector< TreeliteModelHandle > treelite_handles)
 
void fit (const raft::handle_t &user_handle, RandomForestClassifierF *&forest, float *input, int n_rows, int n_cols, int *labels, int n_unique_labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO)
 
void fit (const raft::handle_t &user_handle, RandomForestClassifierD *&forest, double *input, int n_rows, int n_cols, int *labels, int n_unique_labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO)
 
void predict (const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const float *input, int n_rows, int n_cols, int *predictions, int verbosity=CUML_LEVEL_INFO)
 
void predict (const raft::handle_t &user_handle, const RandomForestClassifierD *forest, const double *input, int n_rows, int n_cols, int *predictions, int verbosity=CUML_LEVEL_INFO)
 
RF_metrics score (const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const int *ref_labels, int n_rows, const int *predictions, int verbosity=CUML_LEVEL_INFO)
 
RF_metrics score (const raft::handle_t &user_handle, const RandomForestClassifierD *forest, const int *ref_labels, int n_rows, const int *predictions, int verbosity=CUML_LEVEL_INFO)
 
RF_params set_rf_params (int max_depth, int max_leaves, float max_features, int max_n_bins, int min_samples_leaf, int min_samples_split, float min_impurity_decrease, bool bootstrap, int n_trees, float max_samples, uint64_t seed, CRITERION split_criterion, int cfg_n_streams, int max_batch_size)
 
void fit (const raft::handle_t &user_handle, RandomForestRegressorF *&forest, float *input, int n_rows, int n_cols, float *labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO)
 
void fit (const raft::handle_t &user_handle, RandomForestRegressorD *&forest, double *input, int n_rows, int n_cols, double *labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO)
 
void predict (const raft::handle_t &user_handle, const RandomForestRegressorF *forest, const float *input, int n_rows, int n_cols, float *predictions, int verbosity=CUML_LEVEL_INFO)
 
void predict (const raft::handle_t &user_handle, const RandomForestRegressorD *forest, const double *input, int n_rows, int n_cols, double *predictions, int verbosity=CUML_LEVEL_INFO)
 
RF_metrics score (const raft::handle_t &user_handle, const RandomForestRegressorF *forest, const float *ref_labels, int n_rows, const float *predictions, int verbosity=CUML_LEVEL_INFO)
 
RF_metrics score (const raft::handle_t &user_handle, const RandomForestRegressorD *forest, const double *ref_labels, int n_rows, const double *predictions, int verbosity=CUML_LEVEL_INFO)
 
void TSNE_fit (const raft::handle_t &handle, float *X, float *Y, int n, int p, int64_t *knn_indices, float *knn_dists, TSNEParams &params, float *kl_div=nullptr)
 Dimensionality reduction via TSNE using Barnes-Hut, Fourier Interpolation, or naive methods. or brute force O(N^2). More...
 
void TSNE_fit_sparse (const raft::handle_t &handle, int *indptr, int *indices, float *data, float *Y, int nnz, int n, int p, int *knn_indices, float *knn_dists, TSNEParams &params, float *kl_div=nullptr)
 Dimensionality reduction via TSNE using either Barnes Hut O(NlogN) or brute force O(N^2). More...
 
void brute_force_knn (const raft::handle_t &handle, std::vector< float * > &input, std::vector< int > &sizes, int D, float *search_items, int n, int64_t *res_I, float *res_D, int k, bool rowMajorIndex=false, bool rowMajorQuery=false, raft::distance::DistanceType metric=raft::distance::DistanceType::L2Expanded, float metric_arg=2.0f, std::vector< int64_t > *translations=nullptr)
 Flat C++ API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances. More...
 
void rbc_build_index (const raft::handle_t &handle, raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > &index)
 
void rbc_knn_query (const raft::handle_t &handle, raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > &index, uint32_t k, const float *search_items, uint32_t n_search_items, int64_t *out_inds, float *out_dists)
 
void approx_knn_build_index (raft::handle_t &handle, knnIndex *index, knnIndexParam *params, raft::distance::DistanceType metric, float metricArg, float *index_array, int n, int D)
 Flat C++ API function to build an approximate nearest neighbors index from an index array and a set of parameters. More...
 
void approx_knn_search (raft::handle_t &handle, float *distances, int64_t *indices, knnIndex *index, int k, float *query_array, int n)
 Flat C++ API function to perform an approximate nearest neighbors search from previously built index and a query array. More...
 
void knn_classify (raft::handle_t &handle, int *out, int64_t *knn_indices, std::vector< int * > &y, size_t n_index_rows, size_t n_query_rows, int k)
 Flat C++ API function to perform a knn classification using a given a vector of label arrays. This supports multilabel classification by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn. More...
 
void knn_regress (raft::handle_t &handle, float *out, int64_t *knn_indices, std::vector< float * > &y, size_t n_index_rows, size_t n_query_rows, int k)
 Flat C++ API function to perform a knn regression using a given a vector of label arrays. This supports multilabel regression by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn. More...
 
void knn_class_proba (raft::handle_t &handle, std::vector< float * > &out, int64_t *knn_indices, std::vector< int * > &y, size_t n_index_rows, size_t n_query_rows, int k)
 Flat C++ API function to compute knn class probabilities using a vector of device arrays containing discrete class labels. Note that the output is a vector, which is. More...
 
template<typename math_t >
void RPROJfit (const raft::handle_t &handle, rand_mat< math_t > *random_matrix, paramsRPROJ *params)
 
template<typename math_t >
void RPROJtransform (const raft::handle_t &handle, math_t *input, rand_mat< math_t > *random_matrix, math_t *output, paramsRPROJ *params)
 
size_t johnson_lindenstrauss_min_dim (size_t n_samples, double eps)
 
int divide_by_mask_build_index (const raft::handle_t &handle, const bool *d_mask, int *d_index, int batch_size)
 
void divide_by_mask_execute (const raft::handle_t &handle, const float *d_in, const bool *d_mask, const int *d_index, float *d_out0, float *d_out1, int batch_size, int n_obs)
 
void divide_by_mask_execute (const raft::handle_t &handle, const double *d_in, const bool *d_mask, const int *d_index, double *d_out0, double *d_out1, int batch_size, int n_obs)
 
void divide_by_mask_execute (const raft::handle_t &handle, const int *d_in, const bool *d_mask, const int *d_index, int *d_out0, int *d_out1, int batch_size, int n_obs)
 
void divide_by_min_build_index (const raft::handle_t &handle, const float *d_matrix, int *d_batch, int *d_index, int *h_size, int batch_size, int n_sub)
 
void divide_by_min_build_index (const raft::handle_t &handle, const double *d_matrix, int *d_batch, int *d_index, int *h_size, int batch_size, int n_sub)
 
void divide_by_min_execute (const raft::handle_t &handle, const float *d_in, const int *d_batch, const int *d_index, float **hd_out, int batch_size, int n_sub, int n_obs)
 
void divide_by_min_execute (const raft::handle_t &handle, const double *d_in, const int *d_batch, const int *d_index, double **hd_out, int batch_size, int n_sub, int n_obs)
 
void divide_by_min_execute (const raft::handle_t &handle, const int *d_in, const int *d_batch, const int *d_index, int **hd_out, int batch_size, int n_sub, int n_obs)
 
void build_division_map (const raft::handle_t &handle, const int *const *hd_id, const int *h_size, int *d_id_to_pos, int *d_id_to_model, int batch_size, int n_sub)
 
void merge_series (const raft::handle_t &handle, const float *const *hd_in, const int *d_id_to_pos, const int *d_id_to_sub, float *d_out, int batch_size, int n_sub, int n_obs)
 
void merge_series (const raft::handle_t &handle, const double *const *hd_in, const int *d_id_to_pos, const int *d_id_to_sub, double *d_out, int batch_size, int n_sub, int n_obs)
 
void pack (raft::handle_t &handle, const ARIMAParams< double > &params, const ARIMAOrder &order, int batch_size, double *param_vec)
 
void unpack (raft::handle_t &handle, ARIMAParams< double > &params, const ARIMAOrder &order, int batch_size, const double *param_vec)
 
bool detect_missing (raft::handle_t &handle, const double *d_y, int n_elem)
 
void batched_diff (raft::handle_t &handle, double *d_y_diff, const double *d_y, int batch_size, int n_obs, const ARIMAOrder &order)
 
void batched_loglike (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const double *d_params, double *loglike, bool trans=true, bool host_loglike=true, LoglikeMethod method=MLE, int truncate=0)
 
void batched_loglike (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const ARIMAParams< double > &params, double *loglike, bool trans=true, bool host_loglike=true, LoglikeMethod method=MLE, int truncate=0, int fc_steps=0, double *d_fc=nullptr, const double *d_exog_fut=nullptr, double level=0, double *d_lower=nullptr, double *d_upper=nullptr)
 
void batched_loglike_grad (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const double *d_x, double *d_grad, double h, bool trans=true, LoglikeMethod method=MLE, int truncate=0)
 
void predict (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, const double *d_exog_fut, int batch_size, int n_obs, int start, int end, const ARIMAOrder &order, const ARIMAParams< double > &params, double *d_y_p, bool pre_diff=true, double level=0, double *d_lower=nullptr, double *d_upper=nullptr)
 
void information_criterion (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const ARIMAParams< double > &params, double *ic, int ic_type)
 
void estimate_x0 (raft::handle_t &handle, ARIMAParams< double > &params, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, bool missing)
 
void batched_kalman_filter (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_ys, const double *d_exog, int nobs, const ARIMAParams< double > &params, const ARIMAOrder &order, int batch_size, double *d_loglike, double *d_pred, int fc_steps=0, double *d_fc=nullptr, const double *d_exog_fut=nullptr, double level=0, double *d_lower=nullptr, double *d_upper=nullptr)
 
void batched_jones_transform (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const ARIMAOrder &order, int batch_size, bool isInv, const double *h_params, double *h_Tparams)
 
int convert_level_to_spdlog (int level)
 
void PUSH_RANGE (const char *name, cudaStream_t stream)
 Synchronize CUDA stream and push a named nvtx range. More...
 
void POP_RANGE (cudaStream_t stream)
 Synchronize CUDA stream and pop the latest nvtx range. More...
 
void PUSH_RANGE (const char *name)
 Push a named nvtx range. More...
 
void POP_RANGE ()
 
template<typename T >
void col_ref (const SimpleDenseMat< T > &mat, SimpleVec< T > &mask_vec, int c)
 
template<typename T >
void col_slice (const SimpleDenseMat< T > &mat, SimpleDenseMat< T > &mask_mat, int c_from, int c_to)
 
template<typename T >
dot (const SimpleVec< T > &u, const SimpleVec< T > &v, T *tmp_dev, cudaStream_t stream)
 
template<typename T >
squaredNorm (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
 
template<typename T >
nrmMax (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
 
template<typename T >
nrm2 (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
 
template<typename T >
nrm1 (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream)
 
template<typename T >
std::ostream & operator<< (std::ostream &os, const SimpleVec< T > &v)
 
template<typename T >
std::ostream & operator<< (std::ostream &os, const SimpleDenseMat< T > &mat)
 
template<typename T , typename I = int>
void check_csr (const SimpleSparseMat< T, I > &mat, cudaStream_t stream)
 
template<typename T , typename I = int>
std::ostream & operator<< (std::ostream &os, const SimpleSparseMat< T, I > &mat)
 
cumlError_t knn_search (const cumlHandle_t handle, float **input, int *sizes, int n_params, int D, float *search_items, int n, int64_t *res_I, float *res_D, int k, bool rowMajorIndex, bool rowMajorQuery, int metric_type, float metric_arg, bool expanded)
 Flat C API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances. More...
 
int get_device (const void *ptr)
 
cudaMemoryType memory_type (const void *p)
 
bool is_device_or_managed_type (const void *p)
 

Variables

HandleMap handleMap
 Static handle map instance (see cumlHandle.cpp) More...
 

Typedef Documentation

◆ knn_indices_dense_t

typedef int64_t ML::knn_indices_dense_t

◆ knn_indices_sparse_t

◆ nn_index_params

using ML::nn_index_params = typedef raft::neighbors::experimental::nn_descent::index_params

◆ paramsPCA

◆ paramsPCAMG

◆ paramsTSVD

◆ paramsTSVDMG

◆ RandomForestClassifierD

◆ RandomForestClassifierF

◆ RandomForestRegressorD

◆ RandomForestRegressorF

Enumeration Type Documentation

◆ CRITERION

Enumerator
GINI 
ENTROPY 
MSE 
MAE 
POISSON 
GAMMA 
INVERSE_GAUSSIAN 
CRITERION_END 

◆ LoglikeMethod

Enumerator
CSS 
MLE 

◆ loss_funct

Enumerator
SQRD_LOSS 
HINGE 
LOG 

◆ lr_type

Enumerator
OPTIMAL 
CONSTANT 
INVSCALING 
ADAPTIVE 

◆ mg_solver

enum ML::mg_solver
strong
Enumerator
COV_EIG_DQ 
COV_EIG_JACOBI 
QR 

◆ Norm

enum ML::Norm
Enumerator
L0 
L1 
L2 
LINF 

◆ OptimCriterion

Enumerator
OPTIM_BFGS_ITER_LIMIT 
OPTIM_MIN_PARAM_DIFF 
OPTIM_MIN_ERROR_DIFF 
OPTIM_MIN_GRAD_NORM 

◆ penalty

Enumerator
NONE 
L1 
L2 
ELASTICNET 

◆ RF_type

Enumerator
CLASSIFICATION 
REGRESSION 

◆ SeasonalType

Enumerator
ADDITIVE 
MULTIPLICATIVE 

◆ solver

enum ML::solver : int
strong
Parameters
COV_EIG_DQcovariance of input will be used along with eigen decomposition using divide and conquer method for symmetric matrices
COV_EIG_JACOBIcovariance of input will be used along with eigen decomposition using jacobi method for symmetric matrices
Enumerator
COV_EIG_DQ 
COV_EIG_JACOBI 

◆ STORAGE_ORDER

Enumerator
COL_MAJOR 
ROW_MAJOR 

◆ task_category

Enumerator
REGRESSION_MODEL 
CLASSIFICATION_MODEL 

◆ TSNE_ALGORITHM

Enumerator
EXACT 
BARNES_HUT 
FFT 

◆ TSNE_INIT

Enumerator
RANDOM 
PCA 

Function Documentation

◆ _extract_clusters()

void ML::_extract_clusters ( const raft::handle_t &  handle,
size_t  n_leaves,
int  n_edges,
int *  parents,
int *  children,
float *  lambdas,
int *  sizes,
int *  labels,
float *  probabilities,
HDBSCAN::Common::CLUSTER_SELECTION_METHOD  cluster_selection_method,
bool  allow_single_cluster,
int  max_cluster_size,
float  cluster_selection_epsilon 
)

◆ approx_knn_build_index()

void ML::approx_knn_build_index ( raft::handle_t &  handle,
knnIndex index,
knnIndexParam params,
raft::distance::DistanceType  metric,
float  metricArg,
float *  index_array,
int  n,
int  D 
)

Flat C++ API function to build an approximate nearest neighbors index from an index array and a set of parameters.

Parameters
[in]handleRAFT handle
[out]indexindex to be built
[in]paramsparametrization of the index to be built
[in]metricdistance metric to use. Euclidean (L2) is used by default
[in]metricArgmetric argument
[in]index_arraythe index array to build the index with
[in]nnumber of rows in the index array
[in]Dthe dimensionality of the index array

◆ approx_knn_search()

void ML::approx_knn_search ( raft::handle_t &  handle,
float *  distances,
int64_t *  indices,
knnIndex index,
int  k,
float *  query_array,
int  n 
)

Flat C++ API function to perform an approximate nearest neighbors search from previously built index and a query array.

Parameters
[in]handleRAFT handle
[out]distancesdistances of the nearest neighbors toward their query point
[out]indicesindices of the nearest neighbors
[in]indexindex to perform a search with
[in]kthe number of nearest neighbors to search for
[in]query_arraythe query to perform a search with
[in]nnumber of rows in the query array

◆ batched_diff()

void ML::batched_diff ( raft::handle_t &  handle,
double *  d_y_diff,
const double *  d_y,
int  batch_size,
int  n_obs,
const ARIMAOrder order 
)

Compute the differenced series (seasonal and/or non-seasonal differences)

Parameters
[in]handlecuML handle
[out]d_y_diffDifferenced series
[in]d_yOriginal series
[in]batch_sizeBatch size
[in]n_obsNumber of observations
[in]orderARIMA order

◆ batched_jones_transform()

void ML::batched_jones_transform ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const ARIMAOrder order,
int  batch_size,
bool  isInv,
const double *  h_params,
double *  h_Tparams 
)

Convenience function for batched "jones transform" used in ARIMA to ensure certain properties of the AR and MA parameters (takes host array and returns host array)

Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]orderARIMA hyper-parameters
[in]batch_sizeNumber of time series analyzed.
[in]isInvDo the inverse transform?
[in]h_paramsARIMA parameters by batch (mu, ar, ma) (host)
[out]h_TparamsTransformed ARIMA parameters (expects pre-allocated array of size (p+q)*batch_size) (host)

◆ batched_kalman_filter()

void ML::batched_kalman_filter ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const double *  d_ys,
const double *  d_exog,
int  nobs,
const ARIMAParams< double > &  params,
const ARIMAOrder order,
int  batch_size,
double *  d_loglike,
double *  d_pred,
int  fc_steps = 0,
double *  d_fc = nullptr,
const double *  d_exog_fut = nullptr,
double  level = 0,
double *  d_lower = nullptr,
double *  d_upper = nullptr 
)

An ARIMA specialized batched kalman filter to evaluate ARMA parameters and provide the resulting prediction as well as loglikelihood fit.

Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]d_ysBatched time series Shape (nobs, batch_size) (col-major, device)
[in]d_exogBatched exogenous variables Shape (nobs, n_exog * batch_size) (col-major, device)
[in]nobsNumber of samples per time series
[in]paramsARIMA parameters (device)
[in]orderARIMA hyper-parameters
[in]batch_sizeNumber of series making up the batch
[out]d_loglikeResulting log-likelihood (per series) (device)
[out]d_predPredictions shape=(nobs-d-s*D, batch_size) (device)
[in]fc_stepsNumber of steps to forecast
[in]d_fcArray to store the forecast
[in]d_exog_futFuture values of exogenous variables Shape (fc_steps, n_exog * batch_size) (col-major, device)
[in]levelConfidence level for prediction intervals. 0 to skip the computation. Else 0 < level < 1
[out]d_lowerLower limit of the prediction interval
[out]d_upperUpper limit of the prediction interval

◆ batched_loglike() [1/2]

void ML::batched_loglike ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const double *  d_y,
const double *  d_exog,
int  batch_size,
int  n_obs,
const ARIMAOrder order,
const ARIMAParams< double > &  params,
double *  loglike,
bool  trans = true,
bool  host_loglike = true,
LoglikeMethod  method = MLE,
int  truncate = 0,
int  fc_steps = 0,
double *  d_fc = nullptr,
const double *  d_exog_fut = nullptr,
double  level = 0,
double *  d_lower = nullptr,
double *  d_upper = nullptr 
)

Compute the loglikelihood of the given parameter on the given time series in a batched context.

Note
: this overload should be used when the parameters are already unpacked to avoid useless packing / unpacking
Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]d_ySeries to fit: shape = (n_obs, batch_size) and expects column major data layout. (device)
[in]d_exogExogenous variables: shape = (n_obs, n_exog * batch_size) and expects column major data layout. (device)
[in]batch_sizeNumber of time series
[in]n_obsNumber of observations in a time series
[in]orderARIMA hyper-parameters
[in]paramsARIMA parameters (device)
[out]loglikeLog-Likelihood of the model per series
[in]transRun jones_transform on params.
[in]host_loglikeWhether loglike is a host pointer
[in]methodWhether to use sum-of-squares or Kalman filter
[in]truncateFor CSS, start the sum-of-squares after a given number of observations
[in]fc_stepsNumber of steps to forecast
[in]d_fcArray to store the forecast
[in]d_exog_futFuture values of exogenous variables Shape (fc_steps, n_exog * batch_size) (col-major, device)
[in]levelConfidence level for prediction intervals. 0 to skip the computation. Else 0 < level < 1
[out]d_lowerLower limit of the prediction interval
[out]d_upperUpper limit of the prediction interval

◆ batched_loglike() [2/2]

void ML::batched_loglike ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const double *  d_y,
const double *  d_exog,
int  batch_size,
int  n_obs,
const ARIMAOrder order,
const double *  d_params,
double *  loglike,
bool  trans = true,
bool  host_loglike = true,
LoglikeMethod  method = MLE,
int  truncate = 0 
)

Compute the loglikelihood of the given parameter on the given time series in a batched context.

Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]d_ySeries to fit: shape = (n_obs, batch_size) and expects column major data layout. (device)
[in]d_exogExogenous variables: shape = (n_obs, n_exog * batch_size) and expects column major data layout. (device)
[in]batch_sizeNumber of time series
[in]n_obsNumber of observations in a time series
[in]orderARIMA hyper-parameters
[in]d_paramsParameters to evaluate grouped by series: [mu0, ar.., ma.., mu1, ..] (device)
[out]loglikeLog-Likelihood of the model per series
[in]transRun jones_transform on params.
[in]host_loglikeWhether loglike is a host pointer
[in]methodWhether to use sum-of-squares or Kalman filter
[in]truncateFor CSS, start the sum-of-squares after a given number of observations

◆ batched_loglike_grad()

void ML::batched_loglike_grad ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const double *  d_y,
const double *  d_exog,
int  batch_size,
int  n_obs,
const ARIMAOrder order,
const double *  d_x,
double *  d_grad,
double  h,
bool  trans = true,
LoglikeMethod  method = MLE,
int  truncate = 0 
)

Compute the gradient of the log-likelihood

Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]d_ySeries to fit: shape = (n_obs, batch_size) and expects column major data layout. (device)
[in]d_exogExogenous variables: shape = (n_obs, n_exog * batch_size) and expects column major data layout. (device)
[in]batch_sizeNumber of time series
[in]n_obsNumber of observations in a time series
[in]orderARIMA hyper-parameters
[in]d_xParameters grouped by series
[out]d_gradGradient to compute
[in]hFinite-differencing step size
[in]transRun jones_transform on params
[in]methodWhether to use sum-of-squares or Kalman filter
[in]truncateFor CSS, start the sum-of-squares after a given number of observations

◆ brute_force_knn()

void ML::brute_force_knn ( const raft::handle_t &  handle,
std::vector< float * > &  input,
std::vector< int > &  sizes,
int  D,
float *  search_items,
int  n,
int64_t *  res_I,
float *  res_D,
int  k,
bool  rowMajorIndex = false,
bool  rowMajorQuery = false,
raft::distance::DistanceType  metric = raft::distance::DistanceType::L2Expanded,
float  metric_arg = 2.0f,
std::vector< int64_t > *  translations = nullptr 
)

Flat C++ API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances.

Parameters
[in]handleRAFT handle
[in]inputvector of pointers to the input arrays
[in]sizesvector of sizes of input arrays
[in]Dthe dimensionality of the arrays
[in]search_itemsarray of items to search of dimensionality D
[in]nnumber of rows in search_items
[out]res_Ithe resulting index array of size n * k
[out]res_Dthe resulting distance array of size n * k
[in]kthe number of nearest neighbors to return
[in]rowMajorIndexare the index arrays in row-major order?
[in]rowMajorQueryare the query arrays in row-major order?
[in]metricdistance metric to use. Euclidean (L2) is used by default
[in]metric_argthe value of p for Minkowski (l-p) distances. This is ignored if the metric_type is not Minkowski.
[in]translationstranslation ids for indices when index rows represent non-contiguous partitions

◆ build_condensed_hierarchy()

void ML::build_condensed_hierarchy ( const raft::handle_t &  handle,
const int *  children,
const float *  delta,
const int *  sizes,
int  min_cluster_size,
int  n_leaves,
HDBSCAN::Common::CondensedHierarchy< int, float > &  condensed_tree 
)

◆ build_division_map()

void ML::build_division_map ( const raft::handle_t &  handle,
const int *const *  hd_id,
const int *  h_size,
int *  d_id_to_pos,
int *  d_id_to_model,
int  batch_size,
int  n_sub 
)

Build a map to associate each batch member with a model and index in the associated sub-batch

Parameters
[in]handlecuML handle
[in]hd_idHost array of pointers to device arrays containing the indices of the members of each sub-batch
[in]h_sizeHost array containing the size of each sub-batch
[out]d_id_to_posDevice array containing the position of each member in its new sub-batch
[out]d_id_to_modelDevice array associating each member with its sub-batch
[in]batch_sizeBatch size
[in]n_subNumber of sub-batches

◆ build_treelite_forest()

template<class T , class L >
void ML::build_treelite_forest ( TreeliteModelHandle model,
const RandomForestMetaData< T, L > *  forest,
int  num_features 
)

◆ check_csr()

template<typename T , typename I = int>
void ML::check_csr ( const SimpleSparseMat< T, I > &  mat,
cudaStream_t  stream 
)
inline

◆ col_ref()

template<typename T >
void ML::col_ref ( const SimpleDenseMat< T > &  mat,
SimpleVec< T > &  mask_vec,
int  c 
)
inline

◆ col_slice()

template<typename T >
void ML::col_slice ( const SimpleDenseMat< T > &  mat,
SimpleDenseMat< T > &  mask_mat,
int  c_from,
int  c_to 
)
inline

◆ compute_all_points_membership_vectors()

void ML::compute_all_points_membership_vectors ( const raft::handle_t &  handle,
HDBSCAN::Common::CondensedHierarchy< int, float > &  condensed_tree,
HDBSCAN::Common::PredictionData< int, float > &  prediction_data,
const float *  X,
raft::distance::DistanceType  metric,
float *  membership_vec,
size_t  batch_size = 4096 
)

◆ compute_membership_vector()

void ML::compute_membership_vector ( const raft::handle_t &  handle,
HDBSCAN::Common::CondensedHierarchy< int, float > &  condensed_tree,
HDBSCAN::Common::PredictionData< int, float > &  prediction_data,
const float *  X,
const float *  points_to_predict,
size_t  n_prediction_points,
int  min_samples,
raft::distance::DistanceType  metric,
float *  membership_vec,
size_t  batch_size = 4096 
)

◆ concatenate_trees()

TreeliteModelHandle ML::concatenate_trees ( std::vector< TreeliteModelHandle treelite_handles)

◆ convert_level_to_spdlog()

int ML::convert_level_to_spdlog ( int  level)

◆ delete_rf_metadata()

template<class T , class L >
void ML::delete_rf_metadata ( RandomForestMetaData< T, L > *  forest)

◆ detect_missing()

bool ML::detect_missing ( raft::handle_t &  handle,
const double *  d_y,
int  n_elem 
)

Detect missing observations in a time series

Parameters
[in]handlecuML handle
[in]d_yTime series
[in]n_elemTotal number of elements in the dataset

◆ divide_by_mask_build_index()

int ML::divide_by_mask_build_index ( const raft::handle_t &  handle,
const bool *  d_mask,
int *  d_index,
int  batch_size 
)

Batch division by mask step 1: build an index of the position of each series in its new batch and measure the size of each sub-batch

Parameters
[in]handlecuML handle
[in]d_maskBoolean mask
[out]d_indexIndex of each series in its new batch
[in]batch_sizeBatch size
Returns
The number of 'true' series in the mask

◆ divide_by_mask_execute() [1/3]

void ML::divide_by_mask_execute ( const raft::handle_t &  handle,
const double *  d_in,
const bool *  d_mask,
const int *  d_index,
double *  d_out0,
double *  d_out1,
int  batch_size,
int  n_obs 
)

◆ divide_by_mask_execute() [2/3]

void ML::divide_by_mask_execute ( const raft::handle_t &  handle,
const float *  d_in,
const bool *  d_mask,
const int *  d_index,
float *  d_out0,
float *  d_out1,
int  batch_size,
int  n_obs 
)

Batch division by mask step 2: create both sub-batches from the mask and index

Parameters
[in]handlecuML handle
[in]d_inInput batch. Each series is a contiguous chunk
[in]d_maskBoolean mask
[in]d_indexIndex of each series in its new batch
[out]d_out0The sub-batch for the 'false' members
[out]d_out1The sub-batch for the 'true' members
[in]batch_sizeBatch size
[in]n_obsNumber of data points per series

◆ divide_by_mask_execute() [3/3]

void ML::divide_by_mask_execute ( const raft::handle_t &  handle,
const int *  d_in,
const bool *  d_mask,
const int *  d_index,
int *  d_out0,
int *  d_out1,
int  batch_size,
int  n_obs 
)

◆ divide_by_min_build_index() [1/2]

void ML::divide_by_min_build_index ( const raft::handle_t &  handle,
const double *  d_matrix,
int *  d_batch,
int *  d_index,
int *  h_size,
int  batch_size,
int  n_sub 
)

◆ divide_by_min_build_index() [2/2]

void ML::divide_by_min_build_index ( const raft::handle_t &  handle,
const float *  d_matrix,
int *  d_batch,
int *  d_index,
int *  h_size,
int  batch_size,
int  n_sub 
)

Batch division by minimum value step 1: build an index of which sub-batch each series belongs to, an index of the position of each series in its new batch, and measure the size of each sub-batch

Parameters
[in]handlecuML handle
[in]d_matrixMatrix of the values to minimize Shape: (batch_size, n_sub)
[out]d_batchWhich sub-batch each series belongs to
[out]d_indexIndex of each series in its new batch
[out]h_sizeSize of each sub-batch (host)
[in]batch_sizeBatch size
[in]n_subNumber of sub-batches

◆ divide_by_min_execute() [1/3]

void ML::divide_by_min_execute ( const raft::handle_t &  handle,
const double *  d_in,
const int *  d_batch,
const int *  d_index,
double **  hd_out,
int  batch_size,
int  n_sub,
int  n_obs 
)

◆ divide_by_min_execute() [2/3]

void ML::divide_by_min_execute ( const raft::handle_t &  handle,
const float *  d_in,
const int *  d_batch,
const int *  d_index,
float **  hd_out,
int  batch_size,
int  n_sub,
int  n_obs 
)

Batch division by minimum value step 2: create all the sub-batches

Parameters
[in]handlecuML handle
[in]d_inInput batch. Each series is a contiguous chunk
[in]d_batchWhich sub-batch each series belongs to
[in]d_indexIndex of each series in its new sub-batch
[out]hd_outHost array of pointers to device arrays of each sub-batch
[in]batch_sizeBatch size
[in]n_subNumber of sub-batches
[in]n_obsNumber of data points per series

◆ divide_by_min_execute() [3/3]

void ML::divide_by_min_execute ( const raft::handle_t &  handle,
const int *  d_in,
const int *  d_batch,
const int *  d_index,
int **  hd_out,
int  batch_size,
int  n_sub,
int  n_obs 
)

◆ dot()

template<typename T >
T ML::dot ( const SimpleVec< T > &  u,
const SimpleVec< T > &  v,
T *  tmp_dev,
cudaStream_t  stream 
)
inline

◆ estimate_x0()

void ML::estimate_x0 ( raft::handle_t &  handle,
ARIMAParams< double > &  params,
const double *  d_y,
const double *  d_exog,
int  batch_size,
int  n_obs,
const ARIMAOrder order,
bool  missing 
)

Provide initial estimates to ARIMA parameters mu, AR, and MA

Parameters
[in]handlecuML handle
[in]paramsARIMA parameters (device)
[in]d_ySeries to fit: shape = (n_obs, batch_size) and expects column major data layout. (device)
[in]d_exogExogenous variables. Shape = (n_obs, n_exog * batch_size) (device)
[in]batch_sizeTotal number of batched time series
[in]n_obsNumber of samples per time series (all series must be identical)
[in]orderARIMA hyper-parameters
[in]missingAre there missing observations?

◆ fit() [1/4]

void ML::fit ( const raft::handle_t &  user_handle,
RandomForestClassifierD *&  forest,
double *  input,
int  n_rows,
int  n_cols,
int *  labels,
int  n_unique_labels,
RF_params  rf_params,
int  verbosity = CUML_LEVEL_INFO 
)

◆ fit() [2/4]

void ML::fit ( const raft::handle_t &  user_handle,
RandomForestClassifierF *&  forest,
float *  input,
int  n_rows,
int  n_cols,
int *  labels,
int  n_unique_labels,
RF_params  rf_params,
int  verbosity = CUML_LEVEL_INFO 
)

◆ fit() [3/4]

void ML::fit ( const raft::handle_t &  user_handle,
RandomForestRegressorD *&  forest,
double *  input,
int  n_rows,
int  n_cols,
double *  labels,
RF_params  rf_params,
int  verbosity = CUML_LEVEL_INFO 
)

◆ fit() [4/4]

void ML::fit ( const raft::handle_t &  user_handle,
RandomForestRegressorF *&  forest,
float *  input,
int  n_rows,
int  n_cols,
float *  labels,
RF_params  rf_params,
int  verbosity = CUML_LEVEL_INFO 
)

◆ get_device()

int ML::get_device ( const void *  ptr)
inline

◆ get_rf_detailed_text()

template<class T , class L >
std::string ML::get_rf_detailed_text ( const RandomForestMetaData< T, L > *  forest)

◆ get_rf_json()

template<class T , class L >
std::string ML::get_rf_json ( const RandomForestMetaData< T, L > *  forest)

◆ get_rf_summary_text()

template<class T , class L >
std::string ML::get_rf_summary_text ( const RandomForestMetaData< T, L > *  forest)

◆ hdbscan()

void ML::hdbscan ( const raft::handle_t &  handle,
const float *  X,
size_t  m,
size_t  n,
raft::distance::DistanceType  metric,
HDBSCAN::Common::HDBSCANParams params,
HDBSCAN::Common::hdbscan_output< int, float > &  out,
float *  core_dists 
)

Executes HDBSCAN clustering on an mxn-dimensional input array, X.

Note that while the algorithm is generally deterministic and should provide matching results between RAPIDS and the Scikit-learn Contrib versions, the construction of the k-nearest neighbors graph and minimum spanning tree can introduce differences between the two algorithms, especially when several nearest neighbors around a point might have the same distance. While the differences in the minimum spanning trees alone might be subtle, they can (and often will) lead to some points being assigned different cluster labels between the two implementations.

Parameters
[in]handleraft handle for resource reuse
[in]Xarray (size m, n) on device in row-major format
mnumber of rows in X
nnumber of columns in X
metricdistance metric to use
paramsstruct of configuration hyper-parameters
outstruct of output data and arrays on device
core_distsarray (size m, 1) of core distances

◆ information_criterion()

void ML::information_criterion ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const double *  d_y,
const double *  d_exog,
int  batch_size,
int  n_obs,
const ARIMAOrder order,
const ARIMAParams< double > &  params,
double *  ic,
int  ic_type 
)

Compute an information criterion (AIC, AICc, BIC)

Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]d_ySeries to fit: shape = (n_obs, batch_size) and expects column major data layout. (device)
[in]d_exogExogenous variables. Shape = (n_obs, n_exog * batch_size) (device)
[in]batch_sizeTotal number of batched time series
[in]n_obsNumber of samples per time series (all series must be identical)
[in]orderARIMA hyper-parameters
[in]paramsARIMA parameters (device)
[out]icArray where to write the information criteria Shape: (batch_size) (device)
[in]ic_typeType of information criterion wanted. 0: AIC, 1: AICc, 2: BIC

◆ is_device_or_managed_type()

bool ML::is_device_or_managed_type ( const void *  p)
inline

◆ knn_class_proba()

void ML::knn_class_proba ( raft::handle_t &  handle,
std::vector< float * > &  out,
int64_t *  knn_indices,
std::vector< int * > &  y,
size_t  n_index_rows,
size_t  n_query_rows,
int  k 
)

Flat C++ API function to compute knn class probabilities using a vector of device arrays containing discrete class labels. Note that the output is a vector, which is.

Parameters
[in]handleRAFT handle
[out]outvector of output arrays on device. vector size = n_outputs. Each array should have size(n_samples, n_classes)
[in]knn_indicesarray on device of knn indices (size n_samples * k)
[in]yarray of labels on device (size n_samples)
[in]n_index_rowsnumber of labels in y
[in]n_query_rowsnumber of rows in knn_indices and out
[in]knumber of nearest neighbors in knn_indices

◆ knn_classify()

void ML::knn_classify ( raft::handle_t &  handle,
int *  out,
int64_t *  knn_indices,
std::vector< int * > &  y,
size_t  n_index_rows,
size_t  n_query_rows,
int  k 
)

Flat C++ API function to perform a knn classification using a given a vector of label arrays. This supports multilabel classification by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn.

Parameters
[in]handleRAFT handle
[out]outoutput array on device (size n_samples * size of y vector)
[in]knn_indicesindex array on device resulting from knn query (size n_samples * k)
[in]yvector of label arrays on device vector size is number of (size n_samples)
[in]n_index_rowsnumber of vertices in index (eg. size of each y array)
[in]n_query_rowsnumber of samples in knn_indices
[in]knumber of nearest neighbors in knn_indices

◆ knn_regress()

void ML::knn_regress ( raft::handle_t &  handle,
float *  out,
int64_t *  knn_indices,
std::vector< float * > &  y,
size_t  n_index_rows,
size_t  n_query_rows,
int  k 
)

Flat C++ API function to perform a knn regression using a given a vector of label arrays. This supports multilabel regression by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn.

Parameters
[in]handleRAFT handle
[out]outoutput array on device (size n_samples)
[in]knn_indicesarray on device of knn indices (size n_samples * k)
[in]yarray of labels on device (size n_samples)
[in]n_index_rowsnumber of vertices in index (eg. size of each y array)
[in]n_query_rowsnumber of samples in knn_indices and out
[in]knumber of nearest neighbors in knn_indices

◆ knn_search()

cumlError_t ML::knn_search ( const cumlHandle_t  handle,
float **  input,
int *  sizes,
int  n_params,
int  D,
float *  search_items,
int  n,
int64_t *  res_I,
float *  res_D,
int  k,
bool  rowMajorIndex,
bool  rowMajorQuery,
int  metric_type,
float  metric_arg,
bool  expanded 
)

Flat C API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances.

Parameters
[in]handlethe cuml handle to use
[in]inputan array of pointers to the input arrays
[in]sizesan array of sizes of input arrays
[in]n_paramsarray size of input and sizes
[in]Dthe dimensionality of the arrays
[in]search_itemsarray of items to search of dimensionality D
[in]nnumber of rows in search_items
[out]res_Ithe resulting index array of size n * k
[out]res_Dthe resulting distance array of size n * k
[in]kthe number of nearest neighbors to return
[in]rowMajorIndexis the index array in row major layout?
[in]rowMajorQueryis the query array in row major layout?
[in]metric_typedistance metric to use. Specify the metric using the integer value of the enum ML::MetricType.
[in]metric_argthe value of p for Minkowski (l-p) distances. This is ignored if the metric_type is not Minkowski.
[in]expandedshould lp-based distances be returned in their expanded form (e.g., without raising to the 1/p power).

◆ memory_type()

cudaMemoryType ML::memory_type ( const void *  p)
inline

◆ merge_series() [1/2]

void ML::merge_series ( const raft::handle_t &  handle,
const double *const *  hd_in,
const int *  d_id_to_pos,
const int *  d_id_to_sub,
double *  d_out,
int  batch_size,
int  n_sub,
int  n_obs 
)

◆ merge_series() [2/2]

void ML::merge_series ( const raft::handle_t &  handle,
const float *const *  hd_in,
const int *  d_id_to_pos,
const int *  d_id_to_sub,
float *  d_out,
int  batch_size,
int  n_sub,
int  n_obs 
)

Merge multiple sub-batches into one batch according to the maps that associate each id in the unique batch to a sub-batch and a position in this sub-batch.

Parameters
[in]handlecuML handle
[in]hd_inHost array of pointers to device arrays containing the sub-batches
[in]d_id_to_posDevice array containing the position of each member in its new sub-batch
[in]d_id_to_subDevice array associating each member with its sub-batch
[out]d_outOutput merged batch
[in]batch_sizeBatch size
[in]n_subNumber of sub-batches
[in]n_obsNumber of observations (or forecasts) per series

◆ nrm1()

template<typename T >
T ML::nrm1 ( const SimpleVec< T > &  u,
T *  tmp_dev,
cudaStream_t  stream 
)
inline

◆ nrm2()

template<typename T >
T ML::nrm2 ( const SimpleVec< T > &  u,
T *  tmp_dev,
cudaStream_t  stream 
)
inline

◆ nrmMax()

template<typename T >
T ML::nrmMax ( const SimpleVec< T > &  u,
T *  tmp_dev,
cudaStream_t  stream 
)
inline

◆ operator<<() [1/3]

template<typename T >
std::ostream& ML::operator<< ( std::ostream &  os,
const SimpleDenseMat< T > &  mat 
)

◆ operator<<() [2/3]

template<typename T , typename I = int>
std::ostream& ML::operator<< ( std::ostream &  os,
const SimpleSparseMat< T, I > &  mat 
)

◆ operator<<() [3/3]

template<typename T >
std::ostream& ML::operator<< ( std::ostream &  os,
const SimpleVec< T > &  v 
)

◆ out_of_sample_predict()

void ML::out_of_sample_predict ( const raft::handle_t &  handle,
HDBSCAN::Common::CondensedHierarchy< int, float > &  condensed_tree,
HDBSCAN::Common::PredictionData< int, float > &  prediction_data,
const float *  X,
int *  labels,
const float *  points_to_predict,
size_t  n_prediction_points,
raft::distance::DistanceType  metric,
int  min_samples,
int *  out_labels,
float *  out_probabilities 
)

◆ pack()

void ML::pack ( raft::handle_t &  handle,
const ARIMAParams< double > &  params,
const ARIMAOrder order,
int  batch_size,
double *  param_vec 
)

Pack separate parameter arrays into a compact array

Parameters
[in]handlecuML handle
[in]paramsParameter structure
[in]orderARIMA order
[in]batch_sizeBatch size
[out]param_vecCompact parameter array

◆ pcaFit() [1/2]

void ML::pcaFit ( raft::handle_t &  handle,
double *  input,
double *  components,
double *  explained_var,
double *  explained_var_ratio,
double *  singular_vals,
double *  mu,
double *  noise_vars,
const paramsPCA prms 
)

◆ pcaFit() [2/2]

void ML::pcaFit ( raft::handle_t &  handle,
float *  input,
float *  components,
float *  explained_var,
float *  explained_var_ratio,
float *  singular_vals,
float *  mu,
float *  noise_vars,
const paramsPCA prms 
)

◆ pcaFitTransform() [1/2]

void ML::pcaFitTransform ( raft::handle_t &  handle,
double *  input,
double *  trans_input,
double *  components,
double *  explained_var,
double *  explained_var_ratio,
double *  singular_vals,
double *  mu,
double *  noise_vars,
const paramsPCA prms 
)

◆ pcaFitTransform() [2/2]

void ML::pcaFitTransform ( raft::handle_t &  handle,
float *  input,
float *  trans_input,
float *  components,
float *  explained_var,
float *  explained_var_ratio,
float *  singular_vals,
float *  mu,
float *  noise_vars,
const paramsPCA prms 
)

◆ pcaInverseTransform() [1/2]

void ML::pcaInverseTransform ( raft::handle_t &  handle,
double *  trans_input,
double *  components,
double *  singular_vals,
double *  mu,
double *  input,
const paramsPCA prms 
)

◆ pcaInverseTransform() [2/2]

void ML::pcaInverseTransform ( raft::handle_t &  handle,
float *  trans_input,
float *  components,
float *  singular_vals,
float *  mu,
float *  input,
const paramsPCA prms 
)

◆ pcaTransform() [1/2]

void ML::pcaTransform ( raft::handle_t &  handle,
double *  input,
double *  components,
double *  trans_input,
double *  singular_vals,
double *  mu,
const paramsPCA prms 
)

◆ pcaTransform() [2/2]

void ML::pcaTransform ( raft::handle_t &  handle,
float *  input,
float *  components,
float *  trans_input,
float *  singular_vals,
float *  mu,
const paramsPCA prms 
)

◆ POP_RANGE() [1/2]

void ML::POP_RANGE ( )
inline

Pop the latest range

◆ POP_RANGE() [2/2]

void ML::POP_RANGE ( cudaStream_t  stream)
inline

Synchronize CUDA stream and pop the latest nvtx range.

Parameters
streamstream to synchronize

◆ postprocess_labels()

void ML::postprocess_labels ( int  n_rows,
std::vector< int > &  labels,
std::map< int, int > &  labels_map,
int  verbosity = CUML_LEVEL_INFO 
)

◆ predict() [1/5]

void ML::predict ( const raft::handle_t &  user_handle,
const RandomForestClassifierD forest,
const double *  input,
int  n_rows,
int  n_cols,
int *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ predict() [2/5]

void ML::predict ( const raft::handle_t &  user_handle,
const RandomForestClassifierF forest,
const float *  input,
int  n_rows,
int  n_cols,
int *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ predict() [3/5]

void ML::predict ( const raft::handle_t &  user_handle,
const RandomForestRegressorD forest,
const double *  input,
int  n_rows,
int  n_cols,
double *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ predict() [4/5]

void ML::predict ( const raft::handle_t &  user_handle,
const RandomForestRegressorF forest,
const float *  input,
int  n_rows,
int  n_cols,
float *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ predict() [5/5]

void ML::predict ( raft::handle_t &  handle,
const ARIMAMemory< double > &  arima_mem,
const double *  d_y,
const double *  d_exog,
const double *  d_exog_fut,
int  batch_size,
int  n_obs,
int  start,
int  end,
const ARIMAOrder order,
const ARIMAParams< double > &  params,
double *  d_y_p,
bool  pre_diff = true,
double  level = 0,
double *  d_lower = nullptr,
double *  d_upper = nullptr 
)

Batched in-sample and out-of-sample prediction of a time-series given all the model parameters

Parameters
[in]handlecuML handle
[in]arima_memPre-allocated temporary memory
[in]d_yBatched Time series to predict. Shape: (num_samples, batch size) (device)
[in]d_exogExogenous variables. Shape = (n_obs, n_exog * batch_size) (device)
[in]d_exog_futFuture values of exogenous variables Shape: (end - n_obs, batch_size) (device)
[in]batch_sizeTotal number of batched time series
[in]n_obsNumber of samples per time series (all series must be identical)
[in]startIndex to start the prediction
[in]endIndex to end the prediction (excluded)
[in]orderARIMA hyper-parameters
[in]paramsARIMA parameters (device)
[out]d_y_pPrediction output (device)
[in]pre_diffWhether to use pre-differencing
[in]levelConfidence level for prediction intervals. 0 to skip the computation. Else 0 < level < 1
[out]d_lowerLower limit of the prediction interval
[out]d_upperUpper limit of the prediction interval

◆ preprocess_labels()

void ML::preprocess_labels ( int  n_rows,
std::vector< int > &  labels,
std::map< int, int > &  labels_map,
int  verbosity = CUML_LEVEL_INFO 
)

◆ print()

void ML::print ( const RF_metrics  rf_metrics)

◆ PUSH_RANGE() [1/2]

void ML::PUSH_RANGE ( const char *  name)
inline

Push a named nvtx range.

Parameters
namerange name

◆ PUSH_RANGE() [2/2]

void ML::PUSH_RANGE ( const char *  name,
cudaStream_t  stream 
)
inline

Synchronize CUDA stream and push a named nvtx range.

Parameters
namerange name
streamstream to synchronize

◆ rbc_build_index()

void ML::rbc_build_index ( const raft::handle_t &  handle,
raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > &  index 
)

◆ rbc_knn_query()

void ML::rbc_knn_query ( const raft::handle_t &  handle,
raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > &  index,
uint32_t  k,
const float *  search_items,
uint32_t  n_search_items,
int64_t *  out_inds,
float *  out_dists 
)

◆ score() [1/4]

RF_metrics ML::score ( const raft::handle_t &  user_handle,
const RandomForestClassifierD forest,
const int *  ref_labels,
int  n_rows,
const int *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ score() [2/4]

RF_metrics ML::score ( const raft::handle_t &  user_handle,
const RandomForestClassifierF forest,
const int *  ref_labels,
int  n_rows,
const int *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ score() [3/4]

RF_metrics ML::score ( const raft::handle_t &  user_handle,
const RandomForestRegressorD forest,
const double *  ref_labels,
int  n_rows,
const double *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ score() [4/4]

RF_metrics ML::score ( const raft::handle_t &  user_handle,
const RandomForestRegressorF forest,
const float *  ref_labels,
int  n_rows,
const float *  predictions,
int  verbosity = CUML_LEVEL_INFO 
)

◆ set_all_rf_metrics()

RF_metrics ML::set_all_rf_metrics ( RF_type  rf_type,
float  accuracy,
double  mean_abs_error,
double  mean_squared_error,
double  median_abs_error 
)

◆ set_rf_metrics_classification()

RF_metrics ML::set_rf_metrics_classification ( float  accuracy)

◆ set_rf_metrics_regression()

RF_metrics ML::set_rf_metrics_regression ( double  mean_abs_error,
double  mean_squared_error,
double  median_abs_error 
)

◆ set_rf_params()

RF_params ML::set_rf_params ( int  max_depth,
int  max_leaves,
float  max_features,
int  max_n_bins,
int  min_samples_leaf,
int  min_samples_split,
float  min_impurity_decrease,
bool  bootstrap,
int  n_trees,
float  max_samples,
uint64_t  seed,
CRITERION  split_criterion,
int  cfg_n_streams,
int  max_batch_size 
)

◆ single_linkage_neighbors()

void ML::single_linkage_neighbors ( const raft::handle_t &  handle,
const float *  X,
size_t  m,
size_t  n,
raft::hierarchy::linkage_output< int > *  out,
raft::distance::DistanceType  metric = raft::distance::DistanceType::L2Unexpanded,
int  c = 15,
int  n_clusters = 5 
)

Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using a k-nearest neighbors graph. While this strategy enables the algorithm to scale to much higher numbers of rows, it comes with the downside that additional knn steps may need to be executed to connect an otherwise unconnected k-nn graph.

Parameters
[in]handleraft handle to encapsulate expensive resources
[in]Xdense feature matrix on device
[in]mnumber of rows in X
[in]nnumber of columns in X
[in]metricdistance metric to use. Must be supported by the dense pairwise distances API.
[out]outcontainer object for output arrays
[out]cthe optimal value of k is guaranteed to be at least log(n) + c where c is some constant. This constant can usually be set to a fairly low value, like 15, and still maintain good performance.
[out]n_clustersnumber of clusters to cut from resulting dendrogram

◆ single_linkage_pairwise() [1/2]

void ML::single_linkage_pairwise ( const raft::handle_t &  handle,
const float *  X,
size_t  m,
size_t  n,
raft::hierarchy::linkage_output< int > *  out,
raft::distance::DistanceType  metric,
int  n_clusters = 5 
)

Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using the full n^2 pairwise distance matrix. This can be very fast for smaller datasets when there is enough memory available.

Parameters
[in]handleraft handle to encapsulate expensive resources
[in]Xdense feature matrix on device
[in]mnumber of rows in X
[in]nnumber of columns in X
[in]metricdistance metric to use. Must be supported by the dense pairwise distances API.
[out]outcontainer object for output arrays
[out]n_clustersnumber of clusters to cut from resulting dendrogram

◆ single_linkage_pairwise() [2/2]

void ML::single_linkage_pairwise ( const raft::handle_t &  handle,
const float *  X,
size_t  m,
size_t  n,
raft::hierarchy::linkage_output< int64_t > *  out,
raft::distance::DistanceType  metric,
int  n_clusters = 5 
)

◆ squaredNorm()

template<typename T >
T ML::squaredNorm ( const SimpleVec< T > &  u,
T *  tmp_dev,
cudaStream_t  stream 
)
inline

◆ TSNE_fit()

void ML::TSNE_fit ( const raft::handle_t &  handle,
float *  X,
float *  Y,
int  n,
int  p,
int64_t *  knn_indices,
float *  knn_dists,
TSNEParams params,
float *  kl_div = nullptr 
)

Dimensionality reduction via TSNE using Barnes-Hut, Fourier Interpolation, or naive methods. or brute force O(N^2).

Parameters
[in]handleThe GPU handle.
[in]XThe row-major dataset in device memory.
[out]YThe column-major final embedding in device memory
[in]nNumber of rows in data X.
[in]pNumber of columns in data X.
[in]knn_indicesArray containing nearest neighbors indices.
[in]knn_distsArray containing nearest neighbors distances.
[in]paramsParameters for TSNE model
[out]kl_div(optional) KL divergence output

The CUDA implementation is derived from the excellent CannyLabs open source implementation here: https://github.com/CannyLab/tsne-cuda/. The CannyLabs code is licensed according to the conditions in cuml/cpp/src/tsne/cannylabs_tsne_license.txt. A full description of their approach is available in their article t-SNE-CUDA: GPU-Accelerated t-SNE and its Applications to Modern Data (https://arxiv.org/abs/1807.11824).

◆ TSNE_fit_sparse()

void ML::TSNE_fit_sparse ( const raft::handle_t &  handle,
int *  indptr,
int *  indices,
float *  data,
float *  Y,
int  nnz,
int  n,
int  p,
int *  knn_indices,
float *  knn_dists,
TSNEParams params,
float *  kl_div = nullptr 
)

Dimensionality reduction via TSNE using either Barnes Hut O(NlogN) or brute force O(N^2).

Parameters
[in]handleThe GPU handle.
[in]indptrindptr of CSR dataset.
[in]indicesindices of CSR dataset.
[in]datadata of CSR dataset.
[out]YThe final embedding.
[in]nnzThe number of non-zero entries in the CSR.
[in]nNumber of rows in data X.
[in]pNumber of columns in data X.
[in]knn_indicesArray containing nearest neighbors indices.
[in]knn_distsArray containing nearest neighbors distances.
[in]paramsParameters for TSNE model
[out]kl_div(optional) KL divergence output

The CUDA implementation is derived from the excellent CannyLabs open source implementation here: https://github.com/CannyLab/tsne-cuda/. The CannyLabs code is licensed according to the conditions in cuml/cpp/src/tsne/cannylabs_tsne_license.txt. A full description of their approach is available in their article t-SNE-CUDA: GPU-Accelerated t-SNE and its Applications to Modern Data (https://arxiv.org/abs/1807.11824).

◆ tsvdFit() [1/2]

void ML::tsvdFit ( raft::handle_t &  handle,
double *  input,
double *  components,
double *  singular_vals,
const paramsTSVD prms 
)

◆ tsvdFit() [2/2]

void ML::tsvdFit ( raft::handle_t &  handle,
float *  input,
float *  components,
float *  singular_vals,
const paramsTSVD prms 
)

◆ tsvdFitTransform() [1/2]

void ML::tsvdFitTransform ( raft::handle_t &  handle,
double *  input,
double *  trans_input,
double *  components,
double *  explained_var,
double *  explained_var_ratio,
double *  singular_vals,
const paramsTSVD prms 
)

◆ tsvdFitTransform() [2/2]

void ML::tsvdFitTransform ( raft::handle_t &  handle,
float *  input,
float *  trans_input,
float *  components,
float *  explained_var,
float *  explained_var_ratio,
float *  singular_vals,
const paramsTSVD prms 
)

◆ tsvdInverseTransform() [1/2]

void ML::tsvdInverseTransform ( raft::handle_t &  handle,
double *  trans_input,
double *  components,
double *  input,
const paramsTSVD prms 
)

◆ tsvdInverseTransform() [2/2]

void ML::tsvdInverseTransform ( raft::handle_t &  handle,
float *  trans_input,
float *  components,
float *  input,
const paramsTSVD prms 
)

◆ tsvdTransform() [1/2]

void ML::tsvdTransform ( raft::handle_t &  handle,
double *  input,
double *  components,
double *  trans_input,
const paramsTSVD prms 
)

◆ tsvdTransform() [2/2]

void ML::tsvdTransform ( raft::handle_t &  handle,
float *  input,
float *  components,
float *  trans_input,
const paramsTSVD prms 
)

◆ unpack()

void ML::unpack ( raft::handle_t &  handle,
ARIMAParams< double > &  params,
const ARIMAOrder order,
int  batch_size,
const double *  param_vec 
)

Unpack a compact array into separate parameter arrays

Parameters
[in]handlecuML handle
[out]paramsParameter structure
[in]orderARIMA order
[in]batch_sizeBatch size
[in]param_vecCompact parameter array

Variable Documentation

◆ handleMap

HandleMap ML::handleMap

Static handle map instance (see cumlHandle.cpp)