Namespaces | |
CD | |
Datasets | |
Dbscan | |
DT | |
experimental | |
Explainer | |
fil | |
GLM | |
HDBSCAN | |
HoltWinters | |
Internals | |
kmeans | |
KNN | |
Metrics | |
OLS | |
PCA | |
Ridge | |
Solver | |
Sparse | |
Spectral | |
Stationarity | |
SVM | |
TSVD | |
UMAP | |
Classes | |
class | Logger |
The main Logging class for cuML library. More... | |
class | PatternSetter |
RAII based pattern setter for Logger class. More... | |
class | pinned_host_vector |
class | params |
class | paramsSolver |
class | paramsTSVDTemplate |
class | paramsPCATemplate |
structure for pca parameters. Ref: http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html More... | |
struct | RF_metrics |
struct | RF_params |
struct | RandomForestMetaData |
struct | knn_graph |
struct | manifold_inputs_t |
struct | manifold_dense_inputs_t |
struct | manifold_sparse_inputs_t |
struct | manifold_precomputed_knn_inputs_t |
struct | TSNEParams |
class | UMAPParams |
struct | knnIndex |
struct | knnIndexParam |
struct | IVFParam |
struct | IVFFlatParam |
struct | IVFPQParam |
struct | paramsRPROJ |
struct | rand_mat |
struct | ARIMAOrder |
struct | ARIMAParams |
struct | ARIMAMemory |
struct | OptimParams |
class | HandleMap |
struct | SimpleDenseMat |
struct | SimpleMat |
struct | SimpleVec |
struct | SimpleVecOwning |
struct | SimpleMatOwning |
struct | SimpleSparseMat |
Typedefs | |
typedef paramsTSVDTemplate | paramsTSVD |
typedef paramsPCATemplate | paramsPCA |
typedef paramsPCATemplate< mg_solver > | paramsPCAMG |
typedef paramsTSVDTemplate< mg_solver > | paramsTSVDMG |
typedef RandomForestMetaData< float, int > | RandomForestClassifierF |
typedef RandomForestMetaData< double, int > | RandomForestClassifierD |
typedef RandomForestMetaData< float, float > | RandomForestRegressorF |
typedef RandomForestMetaData< double, double > | RandomForestRegressorD |
typedef int64_t | knn_indices_dense_t |
typedef int | knn_indices_sparse_t |
using | nn_index_params = raft::neighbors::experimental::nn_descent::index_params |
Enumerations | |
enum class | solver : int { COV_EIG_DQ , COV_EIG_JACOBI } |
enum class | mg_solver { COV_EIG_DQ , COV_EIG_JACOBI , QR } |
enum | RF_type { CLASSIFICATION , REGRESSION } |
enum | task_category { REGRESSION_MODEL = 1 , CLASSIFICATION_MODEL = 2 } |
enum | TSNE_ALGORITHM { EXACT , BARNES_HUT , FFT } |
enum | TSNE_INIT { RANDOM , PCA } |
enum | random_matrix_type { unset , dense , sparse } |
enum | lr_type { OPTIMAL , CONSTANT , INVSCALING , ADAPTIVE } |
enum | loss_funct { SQRD_LOSS , HINGE , LOG } |
enum | penalty { NONE , L1 , L2 , ELASTICNET } |
enum | CRITERION { GINI , ENTROPY , MSE , MAE , POISSON , GAMMA , INVERSE_GAUSSIAN , CRITERION_END } |
enum | LoglikeMethod { CSS , MLE } |
enum | SeasonalType { ADDITIVE , MULTIPLICATIVE } |
enum | OptimCriterion { OPTIM_BFGS_ITER_LIMIT = 0 , OPTIM_MIN_PARAM_DIFF = 1 , OPTIM_MIN_ERROR_DIFF = 2 , OPTIM_MIN_GRAD_NORM = 3 } |
enum | Norm { L0 , L1 , L2 , LINF } |
enum | STORAGE_ORDER { COL_MAJOR = 0 , ROW_MAJOR = 1 } |
Functions | |
void | hdbscan (const raft::handle_t &handle, const float *X, size_t m, size_t n, cuvs::distance::DistanceType metric, HDBSCAN::Common::HDBSCANParams ¶ms, HDBSCAN::Common::hdbscan_output< int, float > &out, float *core_dists) |
void | build_condensed_hierarchy (const raft::handle_t &handle, const int *children, const float *delta, const int *sizes, int min_cluster_size, int n_leaves, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree) |
void | _extract_clusters (const raft::handle_t &handle, size_t n_leaves, int n_edges, int *parents, int *children, float *lambdas, int *sizes, int *labels, float *probabilities, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, bool allow_single_cluster, int max_cluster_size, float cluster_selection_epsilon) |
void | compute_all_points_membership_vectors (const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, cuvs::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096) |
void | compute_membership_vector (const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, const float *points_to_predict, size_t n_prediction_points, int min_samples, cuvs::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096) |
void | out_of_sample_predict (const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, int *labels, const float *points_to_predict, size_t n_prediction_points, cuvs::distance::DistanceType metric, int min_samples, int *out_labels, float *out_probabilities) |
void | single_linkage_pairwise (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::hierarchy::linkage_output< int > *out, cuvs::distance::DistanceType metric, int n_clusters=5) |
Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using the full n^2 pairwise distance matrix. This can be very fast for smaller datasets when there is enough memory available. More... | |
void | single_linkage_neighbors (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::hierarchy::linkage_output< int > *out, cuvs::distance::DistanceType metric=cuvs::distance::DistanceType::L2Unexpanded, int c=15, int n_clusters=5) |
Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using a k-nearest neighbors graph. While this strategy enables the algorithm to scale to much higher numbers of rows, it comes with the downside that additional knn steps may need to be executed to connect an otherwise unconnected k-nn graph. More... | |
void | single_linkage_pairwise (const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::hierarchy::linkage_output< int64_t > *out, cuvs::distance::DistanceType metric, int n_clusters=5) |
std::string | format (const char *fmt, va_list &vl) |
std::string | format (const char *fmt,...) |
void | pcaFit (raft::handle_t &handle, float *input, float *components, float *explained_var, float *explained_var_ratio, float *singular_vals, float *mu, float *noise_vars, const paramsPCA &prms) |
void | pcaFit (raft::handle_t &handle, double *input, double *components, double *explained_var, double *explained_var_ratio, double *singular_vals, double *mu, double *noise_vars, const paramsPCA &prms) |
void | pcaFitTransform (raft::handle_t &handle, float *input, float *trans_input, float *components, float *explained_var, float *explained_var_ratio, float *singular_vals, float *mu, float *noise_vars, const paramsPCA &prms) |
void | pcaFitTransform (raft::handle_t &handle, double *input, double *trans_input, double *components, double *explained_var, double *explained_var_ratio, double *singular_vals, double *mu, double *noise_vars, const paramsPCA &prms) |
void | pcaInverseTransform (raft::handle_t &handle, float *trans_input, float *components, float *singular_vals, float *mu, float *input, const paramsPCA &prms) |
void | pcaInverseTransform (raft::handle_t &handle, double *trans_input, double *components, double *singular_vals, double *mu, double *input, const paramsPCA &prms) |
void | pcaTransform (raft::handle_t &handle, float *input, float *components, float *trans_input, float *singular_vals, float *mu, const paramsPCA &prms) |
void | pcaTransform (raft::handle_t &handle, double *input, double *components, double *trans_input, double *singular_vals, double *mu, const paramsPCA &prms) |
void | tsvdFit (raft::handle_t &handle, float *input, float *components, float *singular_vals, const paramsTSVD &prms) |
void | tsvdFit (raft::handle_t &handle, double *input, double *components, double *singular_vals, const paramsTSVD &prms) |
void | tsvdInverseTransform (raft::handle_t &handle, float *trans_input, float *components, float *input, const paramsTSVD &prms) |
void | tsvdInverseTransform (raft::handle_t &handle, double *trans_input, double *components, double *input, const paramsTSVD &prms) |
void | tsvdTransform (raft::handle_t &handle, float *input, float *components, float *trans_input, const paramsTSVD &prms) |
void | tsvdTransform (raft::handle_t &handle, double *input, double *components, double *trans_input, const paramsTSVD &prms) |
void | tsvdFitTransform (raft::handle_t &handle, float *input, float *trans_input, float *components, float *explained_var, float *explained_var_ratio, float *singular_vals, const paramsTSVD &prms) |
void | tsvdFitTransform (raft::handle_t &handle, double *input, double *trans_input, double *components, double *explained_var, double *explained_var_ratio, double *singular_vals, const paramsTSVD &prms) |
RF_metrics | set_all_rf_metrics (RF_type rf_type, float accuracy, double mean_abs_error, double mean_squared_error, double median_abs_error) |
RF_metrics | set_rf_metrics_classification (float accuracy) |
RF_metrics | set_rf_metrics_regression (double mean_abs_error, double mean_squared_error, double median_abs_error) |
void | print (const RF_metrics rf_metrics) |
void | preprocess_labels (int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO) |
void | postprocess_labels (int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO) |
template<class T , class L > | |
void | delete_rf_metadata (RandomForestMetaData< T, L > *forest) |
template<class T , class L > | |
std::string | get_rf_summary_text (const RandomForestMetaData< T, L > *forest) |
template<class T , class L > | |
std::string | get_rf_detailed_text (const RandomForestMetaData< T, L > *forest) |
template<class T , class L > | |
std::string | get_rf_json (const RandomForestMetaData< T, L > *forest) |
template<class T , class L > | |
void | build_treelite_forest (TreeliteModelHandle *model, const RandomForestMetaData< T, L > *forest, int num_features) |
TreeliteModelHandle | concatenate_trees (std::vector< TreeliteModelHandle > treelite_handles) |
void | fit (const raft::handle_t &user_handle, RandomForestClassifierF *&forest, float *input, int n_rows, int n_cols, int *labels, int n_unique_labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO) |
void | fit (const raft::handle_t &user_handle, RandomForestClassifierD *&forest, double *input, int n_rows, int n_cols, int *labels, int n_unique_labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO) |
void | predict (const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const float *input, int n_rows, int n_cols, int *predictions, int verbosity=CUML_LEVEL_INFO) |
void | predict (const raft::handle_t &user_handle, const RandomForestClassifierD *forest, const double *input, int n_rows, int n_cols, int *predictions, int verbosity=CUML_LEVEL_INFO) |
RF_metrics | score (const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const int *ref_labels, int n_rows, const int *predictions, int verbosity=CUML_LEVEL_INFO) |
RF_metrics | score (const raft::handle_t &user_handle, const RandomForestClassifierD *forest, const int *ref_labels, int n_rows, const int *predictions, int verbosity=CUML_LEVEL_INFO) |
RF_params | set_rf_params (int max_depth, int max_leaves, float max_features, int max_n_bins, int min_samples_leaf, int min_samples_split, float min_impurity_decrease, bool bootstrap, int n_trees, float max_samples, uint64_t seed, CRITERION split_criterion, int cfg_n_streams, int max_batch_size) |
void | fit (const raft::handle_t &user_handle, RandomForestRegressorF *&forest, float *input, int n_rows, int n_cols, float *labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO) |
void | fit (const raft::handle_t &user_handle, RandomForestRegressorD *&forest, double *input, int n_rows, int n_cols, double *labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO) |
void | predict (const raft::handle_t &user_handle, const RandomForestRegressorF *forest, const float *input, int n_rows, int n_cols, float *predictions, int verbosity=CUML_LEVEL_INFO) |
void | predict (const raft::handle_t &user_handle, const RandomForestRegressorD *forest, const double *input, int n_rows, int n_cols, double *predictions, int verbosity=CUML_LEVEL_INFO) |
RF_metrics | score (const raft::handle_t &user_handle, const RandomForestRegressorF *forest, const float *ref_labels, int n_rows, const float *predictions, int verbosity=CUML_LEVEL_INFO) |
RF_metrics | score (const raft::handle_t &user_handle, const RandomForestRegressorD *forest, const double *ref_labels, int n_rows, const double *predictions, int verbosity=CUML_LEVEL_INFO) |
void | TSNE_fit (const raft::handle_t &handle, float *X, float *Y, int n, int p, int64_t *knn_indices, float *knn_dists, TSNEParams ¶ms, float *kl_div=nullptr) |
Dimensionality reduction via TSNE using Barnes-Hut, Fourier Interpolation, or naive methods. or brute force O(N^2). More... | |
void | TSNE_fit_sparse (const raft::handle_t &handle, int *indptr, int *indices, float *data, float *Y, int nnz, int n, int p, int *knn_indices, float *knn_dists, TSNEParams ¶ms, float *kl_div=nullptr) |
Dimensionality reduction via TSNE using either Barnes Hut O(NlogN) or brute force O(N^2). More... | |
void | brute_force_knn (const raft::handle_t &handle, std::vector< float * > &input, std::vector< int > &sizes, int D, float *search_items, int n, int64_t *res_I, float *res_D, int k, bool rowMajorIndex=false, bool rowMajorQuery=false, cuvs::distance::DistanceType metric=cuvs::distance::DistanceType::L2Expanded, float metric_arg=2.0f, std::vector< int64_t > *translations=nullptr) |
Flat C++ API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances. More... | |
void | rbc_build_index (const raft::handle_t &handle, raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > &index) |
void | rbc_knn_query (const raft::handle_t &handle, raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > &index, uint32_t k, const float *search_items, uint32_t n_search_items, int64_t *out_inds, float *out_dists) |
void | approx_knn_build_index (raft::handle_t &handle, knnIndex *index, knnIndexParam *params, cuvs::distance::DistanceType metric, float metricArg, float *index_array, int n, int D) |
Flat C++ API function to build an approximate nearest neighbors index from an index array and a set of parameters. More... | |
void | approx_knn_search (raft::handle_t &handle, float *distances, int64_t *indices, knnIndex *index, int k, float *query_array, int n) |
Flat C++ API function to perform an approximate nearest neighbors search from previously built index and a query array. More... | |
void | knn_classify (raft::handle_t &handle, int *out, int64_t *knn_indices, std::vector< int * > &y, size_t n_index_rows, size_t n_query_rows, int k) |
Flat C++ API function to perform a knn classification using a given a vector of label arrays. This supports multilabel classification by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn. More... | |
void | knn_regress (raft::handle_t &handle, float *out, int64_t *knn_indices, std::vector< float * > &y, size_t n_index_rows, size_t n_query_rows, int k) |
Flat C++ API function to perform a knn regression using a given a vector of label arrays. This supports multilabel regression by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn. More... | |
void | knn_class_proba (raft::handle_t &handle, std::vector< float * > &out, int64_t *knn_indices, std::vector< int * > &y, size_t n_index_rows, size_t n_query_rows, int k) |
Flat C++ API function to compute knn class probabilities using a vector of device arrays containing discrete class labels. Note that the output is a vector, which is. More... | |
template<typename math_t > | |
void | RPROJfit (const raft::handle_t &handle, rand_mat< math_t > *random_matrix, paramsRPROJ *params) |
template<typename math_t > | |
void | RPROJtransform (const raft::handle_t &handle, math_t *input, rand_mat< math_t > *random_matrix, math_t *output, paramsRPROJ *params) |
size_t | johnson_lindenstrauss_min_dim (size_t n_samples, double eps) |
int | divide_by_mask_build_index (const raft::handle_t &handle, const bool *d_mask, int *d_index, int batch_size) |
void | divide_by_mask_execute (const raft::handle_t &handle, const float *d_in, const bool *d_mask, const int *d_index, float *d_out0, float *d_out1, int batch_size, int n_obs) |
void | divide_by_mask_execute (const raft::handle_t &handle, const double *d_in, const bool *d_mask, const int *d_index, double *d_out0, double *d_out1, int batch_size, int n_obs) |
void | divide_by_mask_execute (const raft::handle_t &handle, const int *d_in, const bool *d_mask, const int *d_index, int *d_out0, int *d_out1, int batch_size, int n_obs) |
void | divide_by_min_build_index (const raft::handle_t &handle, const float *d_matrix, int *d_batch, int *d_index, int *h_size, int batch_size, int n_sub) |
void | divide_by_min_build_index (const raft::handle_t &handle, const double *d_matrix, int *d_batch, int *d_index, int *h_size, int batch_size, int n_sub) |
void | divide_by_min_execute (const raft::handle_t &handle, const float *d_in, const int *d_batch, const int *d_index, float **hd_out, int batch_size, int n_sub, int n_obs) |
void | divide_by_min_execute (const raft::handle_t &handle, const double *d_in, const int *d_batch, const int *d_index, double **hd_out, int batch_size, int n_sub, int n_obs) |
void | divide_by_min_execute (const raft::handle_t &handle, const int *d_in, const int *d_batch, const int *d_index, int **hd_out, int batch_size, int n_sub, int n_obs) |
void | build_division_map (const raft::handle_t &handle, const int *const *hd_id, const int *h_size, int *d_id_to_pos, int *d_id_to_model, int batch_size, int n_sub) |
void | merge_series (const raft::handle_t &handle, const float *const *hd_in, const int *d_id_to_pos, const int *d_id_to_sub, float *d_out, int batch_size, int n_sub, int n_obs) |
void | merge_series (const raft::handle_t &handle, const double *const *hd_in, const int *d_id_to_pos, const int *d_id_to_sub, double *d_out, int batch_size, int n_sub, int n_obs) |
void | pack (raft::handle_t &handle, const ARIMAParams< double > ¶ms, const ARIMAOrder &order, int batch_size, double *param_vec) |
void | unpack (raft::handle_t &handle, ARIMAParams< double > ¶ms, const ARIMAOrder &order, int batch_size, const double *param_vec) |
bool | detect_missing (raft::handle_t &handle, const double *d_y, int n_elem) |
void | batched_diff (raft::handle_t &handle, double *d_y_diff, const double *d_y, int batch_size, int n_obs, const ARIMAOrder &order) |
void | batched_loglike (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const double *d_params, double *loglike, bool trans=true, bool host_loglike=true, LoglikeMethod method=MLE, int truncate=0) |
void | batched_loglike (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const ARIMAParams< double > ¶ms, double *loglike, bool trans=true, bool host_loglike=true, LoglikeMethod method=MLE, int truncate=0, int fc_steps=0, double *d_fc=nullptr, const double *d_exog_fut=nullptr, double level=0, double *d_lower=nullptr, double *d_upper=nullptr) |
void | batched_loglike_grad (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const double *d_x, double *d_grad, double h, bool trans=true, LoglikeMethod method=MLE, int truncate=0) |
void | predict (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, const double *d_exog_fut, int batch_size, int n_obs, int start, int end, const ARIMAOrder &order, const ARIMAParams< double > ¶ms, double *d_y_p, bool pre_diff=true, double level=0, double *d_lower=nullptr, double *d_upper=nullptr) |
void | information_criterion (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, const ARIMAParams< double > ¶ms, double *ic, int ic_type) |
void | estimate_x0 (raft::handle_t &handle, ARIMAParams< double > ¶ms, const double *d_y, const double *d_exog, int batch_size, int n_obs, const ARIMAOrder &order, bool missing) |
void | batched_kalman_filter (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const double *d_ys, const double *d_exog, int nobs, const ARIMAParams< double > ¶ms, const ARIMAOrder &order, int batch_size, double *d_loglike, double *d_pred, int fc_steps=0, double *d_fc=nullptr, const double *d_exog_fut=nullptr, double level=0, double *d_lower=nullptr, double *d_upper=nullptr) |
void | batched_jones_transform (raft::handle_t &handle, const ARIMAMemory< double > &arima_mem, const ARIMAOrder &order, int batch_size, bool isInv, const double *h_params, double *h_Tparams) |
int | convert_level_to_spdlog (int level) |
void | PUSH_RANGE (const char *name, cudaStream_t stream) |
Synchronize CUDA stream and push a named nvtx range. More... | |
void | POP_RANGE (cudaStream_t stream) |
Synchronize CUDA stream and pop the latest nvtx range. More... | |
void | PUSH_RANGE (const char *name) |
Push a named nvtx range. More... | |
void | POP_RANGE () |
template<typename T > | |
void | col_ref (const SimpleDenseMat< T > &mat, SimpleVec< T > &mask_vec, int c) |
template<typename T > | |
void | col_slice (const SimpleDenseMat< T > &mat, SimpleDenseMat< T > &mask_mat, int c_from, int c_to) |
template<typename T > | |
T | dot (const SimpleVec< T > &u, const SimpleVec< T > &v, T *tmp_dev, cudaStream_t stream) |
template<typename T > | |
T | squaredNorm (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream) |
template<typename T > | |
T | nrmMax (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream) |
template<typename T > | |
T | nrm2 (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream) |
template<typename T > | |
T | nrm1 (const SimpleVec< T > &u, T *tmp_dev, cudaStream_t stream) |
template<typename T > | |
std::ostream & | operator<< (std::ostream &os, const SimpleVec< T > &v) |
template<typename T > | |
std::ostream & | operator<< (std::ostream &os, const SimpleDenseMat< T > &mat) |
template<typename T , typename I = int> | |
void | check_csr (const SimpleSparseMat< T, I > &mat, cudaStream_t stream) |
template<typename T , typename I = int> | |
std::ostream & | operator<< (std::ostream &os, const SimpleSparseMat< T, I > &mat) |
cumlError_t | knn_search (const cumlHandle_t handle, float **input, int *sizes, int n_params, int D, float *search_items, int n, int64_t *res_I, float *res_D, int k, bool rowMajorIndex, bool rowMajorQuery, int metric_type, float metric_arg, bool expanded) |
Flat C API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances. More... | |
int | get_device (const void *ptr) |
cudaMemoryType | memory_type (const void *p) |
bool | is_device_or_managed_type (const void *p) |
Variables | |
HandleMap | handleMap |
Static handle map instance (see cumlHandle.cpp) More... | |
typedef int64_t ML::knn_indices_dense_t |
typedef int ML::knn_indices_sparse_t |
using ML::nn_index_params = typedef raft::neighbors::experimental::nn_descent::index_params |
typedef paramsPCATemplate ML::paramsPCA |
typedef paramsPCATemplate<mg_solver> ML::paramsPCAMG |
typedef paramsTSVDTemplate ML::paramsTSVD |
typedef paramsTSVDTemplate<mg_solver> ML::paramsTSVDMG |
typedef RandomForestMetaData<double, int> ML::RandomForestClassifierD |
typedef RandomForestMetaData<float, int> ML::RandomForestClassifierF |
typedef RandomForestMetaData<double, double> ML::RandomForestRegressorD |
typedef RandomForestMetaData<float, float> ML::RandomForestRegressorF |
enum ML::CRITERION |
enum ML::LoglikeMethod |
enum ML::loss_funct |
enum ML::lr_type |
|
strong |
enum ML::Norm |
enum ML::OptimCriterion |
enum ML::penalty |
enum ML::RF_type |
enum ML::SeasonalType |
|
strong |
enum ML::STORAGE_ORDER |
enum ML::task_category |
enum ML::TSNE_ALGORITHM |
enum ML::TSNE_INIT |
void ML::_extract_clusters | ( | const raft::handle_t & | handle, |
size_t | n_leaves, | ||
int | n_edges, | ||
int * | parents, | ||
int * | children, | ||
float * | lambdas, | ||
int * | sizes, | ||
int * | labels, | ||
float * | probabilities, | ||
HDBSCAN::Common::CLUSTER_SELECTION_METHOD | cluster_selection_method, | ||
bool | allow_single_cluster, | ||
int | max_cluster_size, | ||
float | cluster_selection_epsilon | ||
) |
void ML::approx_knn_build_index | ( | raft::handle_t & | handle, |
knnIndex * | index, | ||
knnIndexParam * | params, | ||
cuvs::distance::DistanceType | metric, | ||
float | metricArg, | ||
float * | index_array, | ||
int | n, | ||
int | D | ||
) |
Flat C++ API function to build an approximate nearest neighbors index from an index array and a set of parameters.
[in] | handle | RAFT handle |
[out] | index | index to be built |
[in] | params | parametrization of the index to be built |
[in] | metric | distance metric to use. Euclidean (L2) is used by default |
[in] | metricArg | metric argument |
[in] | index_array | the index array to build the index with |
[in] | n | number of rows in the index array |
[in] | D | the dimensionality of the index array |
void ML::approx_knn_search | ( | raft::handle_t & | handle, |
float * | distances, | ||
int64_t * | indices, | ||
knnIndex * | index, | ||
int | k, | ||
float * | query_array, | ||
int | n | ||
) |
Flat C++ API function to perform an approximate nearest neighbors search from previously built index and a query array.
[in] | handle | RAFT handle |
[out] | distances | distances of the nearest neighbors toward their query point |
[out] | indices | indices of the nearest neighbors |
[in] | index | index to perform a search with |
[in] | k | the number of nearest neighbors to search for |
[in] | query_array | the query to perform a search with |
[in] | n | number of rows in the query array |
void ML::batched_diff | ( | raft::handle_t & | handle, |
double * | d_y_diff, | ||
const double * | d_y, | ||
int | batch_size, | ||
int | n_obs, | ||
const ARIMAOrder & | order | ||
) |
Compute the differenced series (seasonal and/or non-seasonal differences)
[in] | handle | cuML handle |
[out] | d_y_diff | Differenced series |
[in] | d_y | Original series |
[in] | batch_size | Batch size |
[in] | n_obs | Number of observations |
[in] | order | ARIMA order |
void ML::batched_jones_transform | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const ARIMAOrder & | order, | ||
int | batch_size, | ||
bool | isInv, | ||
const double * | h_params, | ||
double * | h_Tparams | ||
) |
Convenience function for batched "jones transform" used in ARIMA to ensure certain properties of the AR and MA parameters (takes host array and returns host array)
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | order | ARIMA hyper-parameters |
[in] | batch_size | Number of time series analyzed. |
[in] | isInv | Do the inverse transform? |
[in] | h_params | ARIMA parameters by batch (mu, ar, ma) (host) |
[out] | h_Tparams | Transformed ARIMA parameters (expects pre-allocated array of size (p+q)*batch_size) (host) |
void ML::batched_kalman_filter | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const double * | d_ys, | ||
const double * | d_exog, | ||
int | nobs, | ||
const ARIMAParams< double > & | params, | ||
const ARIMAOrder & | order, | ||
int | batch_size, | ||
double * | d_loglike, | ||
double * | d_pred, | ||
int | fc_steps = 0 , |
||
double * | d_fc = nullptr , |
||
const double * | d_exog_fut = nullptr , |
||
double | level = 0 , |
||
double * | d_lower = nullptr , |
||
double * | d_upper = nullptr |
||
) |
An ARIMA specialized batched kalman filter to evaluate ARMA parameters and provide the resulting prediction as well as loglikelihood fit.
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | d_ys | Batched time series Shape (nobs, batch_size) (col-major, device) |
[in] | d_exog | Batched exogenous variables Shape (nobs, n_exog * batch_size) (col-major, device) |
[in] | nobs | Number of samples per time series |
[in] | params | ARIMA parameters (device) |
[in] | order | ARIMA hyper-parameters |
[in] | batch_size | Number of series making up the batch |
[out] | d_loglike | Resulting log-likelihood (per series) (device) |
[out] | d_pred | Predictions shape=(nobs-d-s*D, batch_size) (device) |
[in] | fc_steps | Number of steps to forecast |
[in] | d_fc | Array to store the forecast |
[in] | d_exog_fut | Future values of exogenous variables Shape (fc_steps, n_exog * batch_size) (col-major, device) |
[in] | level | Confidence level for prediction intervals. 0 to skip the computation. Else 0 < level < 1 |
[out] | d_lower | Lower limit of the prediction interval |
[out] | d_upper | Upper limit of the prediction interval |
void ML::batched_loglike | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const double * | d_y, | ||
const double * | d_exog, | ||
int | batch_size, | ||
int | n_obs, | ||
const ARIMAOrder & | order, | ||
const ARIMAParams< double > & | params, | ||
double * | loglike, | ||
bool | trans = true , |
||
bool | host_loglike = true , |
||
LoglikeMethod | method = MLE , |
||
int | truncate = 0 , |
||
int | fc_steps = 0 , |
||
double * | d_fc = nullptr , |
||
const double * | d_exog_fut = nullptr , |
||
double | level = 0 , |
||
double * | d_lower = nullptr , |
||
double * | d_upper = nullptr |
||
) |
Compute the loglikelihood of the given parameter on the given time series in a batched context.
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | d_y | Series to fit: shape = (n_obs, batch_size) and expects column major data layout. (device) |
[in] | d_exog | Exogenous variables: shape = (n_obs, n_exog * batch_size) and expects column major data layout. (device) |
[in] | batch_size | Number of time series |
[in] | n_obs | Number of observations in a time series |
[in] | order | ARIMA hyper-parameters |
[in] | params | ARIMA parameters (device) |
[out] | loglike | Log-Likelihood of the model per series |
[in] | trans | Run jones_transform on params. |
[in] | host_loglike | Whether loglike is a host pointer |
[in] | method | Whether to use sum-of-squares or Kalman filter |
[in] | truncate | For CSS, start the sum-of-squares after a given number of observations |
[in] | fc_steps | Number of steps to forecast |
[in] | d_fc | Array to store the forecast |
[in] | d_exog_fut | Future values of exogenous variables Shape (fc_steps, n_exog * batch_size) (col-major, device) |
[in] | level | Confidence level for prediction intervals. 0 to skip the computation. Else 0 < level < 1 |
[out] | d_lower | Lower limit of the prediction interval |
[out] | d_upper | Upper limit of the prediction interval |
void ML::batched_loglike | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const double * | d_y, | ||
const double * | d_exog, | ||
int | batch_size, | ||
int | n_obs, | ||
const ARIMAOrder & | order, | ||
const double * | d_params, | ||
double * | loglike, | ||
bool | trans = true , |
||
bool | host_loglike = true , |
||
LoglikeMethod | method = MLE , |
||
int | truncate = 0 |
||
) |
Compute the loglikelihood of the given parameter on the given time series in a batched context.
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | d_y | Series to fit: shape = (n_obs, batch_size) and expects column major data layout. (device) |
[in] | d_exog | Exogenous variables: shape = (n_obs, n_exog * batch_size) and expects column major data layout. (device) |
[in] | batch_size | Number of time series |
[in] | n_obs | Number of observations in a time series |
[in] | order | ARIMA hyper-parameters |
[in] | d_params | Parameters to evaluate grouped by series: [mu0, ar.., ma.., mu1, ..] (device) |
[out] | loglike | Log-Likelihood of the model per series |
[in] | trans | Run jones_transform on params. |
[in] | host_loglike | Whether loglike is a host pointer |
[in] | method | Whether to use sum-of-squares or Kalman filter |
[in] | truncate | For CSS, start the sum-of-squares after a given number of observations |
void ML::batched_loglike_grad | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const double * | d_y, | ||
const double * | d_exog, | ||
int | batch_size, | ||
int | n_obs, | ||
const ARIMAOrder & | order, | ||
const double * | d_x, | ||
double * | d_grad, | ||
double | h, | ||
bool | trans = true , |
||
LoglikeMethod | method = MLE , |
||
int | truncate = 0 |
||
) |
Compute the gradient of the log-likelihood
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | d_y | Series to fit: shape = (n_obs, batch_size) and expects column major data layout. (device) |
[in] | d_exog | Exogenous variables: shape = (n_obs, n_exog * batch_size) and expects column major data layout. (device) |
[in] | batch_size | Number of time series |
[in] | n_obs | Number of observations in a time series |
[in] | order | ARIMA hyper-parameters |
[in] | d_x | Parameters grouped by series |
[out] | d_grad | Gradient to compute |
[in] | h | Finite-differencing step size |
[in] | trans | Run jones_transform on params |
[in] | method | Whether to use sum-of-squares or Kalman filter |
[in] | truncate | For CSS, start the sum-of-squares after a given number of observations |
void ML::brute_force_knn | ( | const raft::handle_t & | handle, |
std::vector< float * > & | input, | ||
std::vector< int > & | sizes, | ||
int | D, | ||
float * | search_items, | ||
int | n, | ||
int64_t * | res_I, | ||
float * | res_D, | ||
int | k, | ||
bool | rowMajorIndex = false , |
||
bool | rowMajorQuery = false , |
||
cuvs::distance::DistanceType | metric = cuvs::distance::DistanceType::L2Expanded , |
||
float | metric_arg = 2.0f , |
||
std::vector< int64_t > * | translations = nullptr |
||
) |
Flat C++ API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances.
[in] | handle | RAFT handle |
[in] | input | vector of pointers to the input arrays |
[in] | sizes | vector of sizes of input arrays |
[in] | D | the dimensionality of the arrays |
[in] | search_items | array of items to search of dimensionality D |
[in] | n | number of rows in search_items |
[out] | res_I | the resulting index array of size n * k |
[out] | res_D | the resulting distance array of size n * k |
[in] | k | the number of nearest neighbors to return |
[in] | rowMajorIndex | are the index arrays in row-major order? |
[in] | rowMajorQuery | are the query arrays in row-major order? |
[in] | metric | distance metric to use. Euclidean (L2) is used by default |
[in] | metric_arg | the value of p for Minkowski (l-p) distances. This is ignored if the metric_type is not Minkowski. |
[in] | translations | translation ids for indices when index rows represent non-contiguous partitions |
void ML::build_condensed_hierarchy | ( | const raft::handle_t & | handle, |
const int * | children, | ||
const float * | delta, | ||
const int * | sizes, | ||
int | min_cluster_size, | ||
int | n_leaves, | ||
HDBSCAN::Common::CondensedHierarchy< int, float > & | condensed_tree | ||
) |
void ML::build_division_map | ( | const raft::handle_t & | handle, |
const int *const * | hd_id, | ||
const int * | h_size, | ||
int * | d_id_to_pos, | ||
int * | d_id_to_model, | ||
int | batch_size, | ||
int | n_sub | ||
) |
Build a map to associate each batch member with a model and index in the associated sub-batch
[in] | handle | cuML handle |
[in] | hd_id | Host array of pointers to device arrays containing the indices of the members of each sub-batch |
[in] | h_size | Host array containing the size of each sub-batch |
[out] | d_id_to_pos | Device array containing the position of each member in its new sub-batch |
[out] | d_id_to_model | Device array associating each member with its sub-batch |
[in] | batch_size | Batch size |
[in] | n_sub | Number of sub-batches |
void ML::build_treelite_forest | ( | TreeliteModelHandle * | model, |
const RandomForestMetaData< T, L > * | forest, | ||
int | num_features | ||
) |
|
inline |
|
inline |
|
inline |
void ML::compute_all_points_membership_vectors | ( | const raft::handle_t & | handle, |
HDBSCAN::Common::CondensedHierarchy< int, float > & | condensed_tree, | ||
HDBSCAN::Common::PredictionData< int, float > & | prediction_data, | ||
const float * | X, | ||
cuvs::distance::DistanceType | metric, | ||
float * | membership_vec, | ||
size_t | batch_size = 4096 |
||
) |
void ML::compute_membership_vector | ( | const raft::handle_t & | handle, |
HDBSCAN::Common::CondensedHierarchy< int, float > & | condensed_tree, | ||
HDBSCAN::Common::PredictionData< int, float > & | prediction_data, | ||
const float * | X, | ||
const float * | points_to_predict, | ||
size_t | n_prediction_points, | ||
int | min_samples, | ||
cuvs::distance::DistanceType | metric, | ||
float * | membership_vec, | ||
size_t | batch_size = 4096 |
||
) |
TreeliteModelHandle ML::concatenate_trees | ( | std::vector< TreeliteModelHandle > | treelite_handles | ) |
int ML::convert_level_to_spdlog | ( | int | level | ) |
void ML::delete_rf_metadata | ( | RandomForestMetaData< T, L > * | forest | ) |
bool ML::detect_missing | ( | raft::handle_t & | handle, |
const double * | d_y, | ||
int | n_elem | ||
) |
Detect missing observations in a time series
[in] | handle | cuML handle |
[in] | d_y | Time series |
[in] | n_elem | Total number of elements in the dataset |
int ML::divide_by_mask_build_index | ( | const raft::handle_t & | handle, |
const bool * | d_mask, | ||
int * | d_index, | ||
int | batch_size | ||
) |
Batch division by mask step 1: build an index of the position of each series in its new batch and measure the size of each sub-batch
[in] | handle | cuML handle |
[in] | d_mask | Boolean mask |
[out] | d_index | Index of each series in its new batch |
[in] | batch_size | Batch size |
void ML::divide_by_mask_execute | ( | const raft::handle_t & | handle, |
const double * | d_in, | ||
const bool * | d_mask, | ||
const int * | d_index, | ||
double * | d_out0, | ||
double * | d_out1, | ||
int | batch_size, | ||
int | n_obs | ||
) |
void ML::divide_by_mask_execute | ( | const raft::handle_t & | handle, |
const float * | d_in, | ||
const bool * | d_mask, | ||
const int * | d_index, | ||
float * | d_out0, | ||
float * | d_out1, | ||
int | batch_size, | ||
int | n_obs | ||
) |
Batch division by mask step 2: create both sub-batches from the mask and index
[in] | handle | cuML handle |
[in] | d_in | Input batch. Each series is a contiguous chunk |
[in] | d_mask | Boolean mask |
[in] | d_index | Index of each series in its new batch |
[out] | d_out0 | The sub-batch for the 'false' members |
[out] | d_out1 | The sub-batch for the 'true' members |
[in] | batch_size | Batch size |
[in] | n_obs | Number of data points per series |
void ML::divide_by_mask_execute | ( | const raft::handle_t & | handle, |
const int * | d_in, | ||
const bool * | d_mask, | ||
const int * | d_index, | ||
int * | d_out0, | ||
int * | d_out1, | ||
int | batch_size, | ||
int | n_obs | ||
) |
void ML::divide_by_min_build_index | ( | const raft::handle_t & | handle, |
const double * | d_matrix, | ||
int * | d_batch, | ||
int * | d_index, | ||
int * | h_size, | ||
int | batch_size, | ||
int | n_sub | ||
) |
void ML::divide_by_min_build_index | ( | const raft::handle_t & | handle, |
const float * | d_matrix, | ||
int * | d_batch, | ||
int * | d_index, | ||
int * | h_size, | ||
int | batch_size, | ||
int | n_sub | ||
) |
Batch division by minimum value step 1: build an index of which sub-batch each series belongs to, an index of the position of each series in its new batch, and measure the size of each sub-batch
[in] | handle | cuML handle |
[in] | d_matrix | Matrix of the values to minimize Shape: (batch_size, n_sub) |
[out] | d_batch | Which sub-batch each series belongs to |
[out] | d_index | Index of each series in its new batch |
[out] | h_size | Size of each sub-batch (host) |
[in] | batch_size | Batch size |
[in] | n_sub | Number of sub-batches |
void ML::divide_by_min_execute | ( | const raft::handle_t & | handle, |
const double * | d_in, | ||
const int * | d_batch, | ||
const int * | d_index, | ||
double ** | hd_out, | ||
int | batch_size, | ||
int | n_sub, | ||
int | n_obs | ||
) |
void ML::divide_by_min_execute | ( | const raft::handle_t & | handle, |
const float * | d_in, | ||
const int * | d_batch, | ||
const int * | d_index, | ||
float ** | hd_out, | ||
int | batch_size, | ||
int | n_sub, | ||
int | n_obs | ||
) |
Batch division by minimum value step 2: create all the sub-batches
[in] | handle | cuML handle |
[in] | d_in | Input batch. Each series is a contiguous chunk |
[in] | d_batch | Which sub-batch each series belongs to |
[in] | d_index | Index of each series in its new sub-batch |
[out] | hd_out | Host array of pointers to device arrays of each sub-batch |
[in] | batch_size | Batch size |
[in] | n_sub | Number of sub-batches |
[in] | n_obs | Number of data points per series |
void ML::divide_by_min_execute | ( | const raft::handle_t & | handle, |
const int * | d_in, | ||
const int * | d_batch, | ||
const int * | d_index, | ||
int ** | hd_out, | ||
int | batch_size, | ||
int | n_sub, | ||
int | n_obs | ||
) |
|
inline |
void ML::estimate_x0 | ( | raft::handle_t & | handle, |
ARIMAParams< double > & | params, | ||
const double * | d_y, | ||
const double * | d_exog, | ||
int | batch_size, | ||
int | n_obs, | ||
const ARIMAOrder & | order, | ||
bool | missing | ||
) |
Provide initial estimates to ARIMA parameters mu, AR, and MA
[in] | handle | cuML handle |
[in] | params | ARIMA parameters (device) |
[in] | d_y | Series to fit: shape = (n_obs, batch_size) and expects column major data layout. (device) |
[in] | d_exog | Exogenous variables. Shape = (n_obs, n_exog * batch_size) (device) |
[in] | batch_size | Total number of batched time series |
[in] | n_obs | Number of samples per time series (all series must be identical) |
[in] | order | ARIMA hyper-parameters |
[in] | missing | Are there missing observations? |
void ML::fit | ( | const raft::handle_t & | user_handle, |
RandomForestClassifierD *& | forest, | ||
double * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
int * | labels, | ||
int | n_unique_labels, | ||
RF_params | rf_params, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::fit | ( | const raft::handle_t & | user_handle, |
RandomForestClassifierF *& | forest, | ||
float * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
int * | labels, | ||
int | n_unique_labels, | ||
RF_params | rf_params, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::fit | ( | const raft::handle_t & | user_handle, |
RandomForestRegressorD *& | forest, | ||
double * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
double * | labels, | ||
RF_params | rf_params, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::fit | ( | const raft::handle_t & | user_handle, |
RandomForestRegressorF *& | forest, | ||
float * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
float * | labels, | ||
RF_params | rf_params, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
|
inline |
std::string ML::get_rf_detailed_text | ( | const RandomForestMetaData< T, L > * | forest | ) |
std::string ML::get_rf_json | ( | const RandomForestMetaData< T, L > * | forest | ) |
std::string ML::get_rf_summary_text | ( | const RandomForestMetaData< T, L > * | forest | ) |
void ML::hdbscan | ( | const raft::handle_t & | handle, |
const float * | X, | ||
size_t | m, | ||
size_t | n, | ||
cuvs::distance::DistanceType | metric, | ||
HDBSCAN::Common::HDBSCANParams & | params, | ||
HDBSCAN::Common::hdbscan_output< int, float > & | out, | ||
float * | core_dists | ||
) |
Executes HDBSCAN clustering on an mxn-dimensional input array, X.
Note that while the algorithm is generally deterministic and should provide matching results between RAPIDS and the Scikit-learn Contrib versions, the construction of the k-nearest neighbors graph and minimum spanning tree can introduce differences between the two algorithms, especially when several nearest neighbors around a point might have the same distance. While the differences in the minimum spanning trees alone might be subtle, they can (and often will) lead to some points being assigned different cluster labels between the two implementations.
[in] | handle | raft handle for resource reuse |
[in] | X | array (size m, n) on device in row-major format |
m | number of rows in X | |
n | number of columns in X | |
metric | distance metric to use | |
params | struct of configuration hyper-parameters | |
out | struct of output data and arrays on device | |
core_dists | array (size m, 1) of core distances |
void ML::information_criterion | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const double * | d_y, | ||
const double * | d_exog, | ||
int | batch_size, | ||
int | n_obs, | ||
const ARIMAOrder & | order, | ||
const ARIMAParams< double > & | params, | ||
double * | ic, | ||
int | ic_type | ||
) |
Compute an information criterion (AIC, AICc, BIC)
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | d_y | Series to fit: shape = (n_obs, batch_size) and expects column major data layout. (device) |
[in] | d_exog | Exogenous variables. Shape = (n_obs, n_exog * batch_size) (device) |
[in] | batch_size | Total number of batched time series |
[in] | n_obs | Number of samples per time series (all series must be identical) |
[in] | order | ARIMA hyper-parameters |
[in] | params | ARIMA parameters (device) |
[out] | ic | Array where to write the information criteria Shape: (batch_size) (device) |
[in] | ic_type | Type of information criterion wanted. 0: AIC, 1: AICc, 2: BIC |
|
inline |
void ML::knn_class_proba | ( | raft::handle_t & | handle, |
std::vector< float * > & | out, | ||
int64_t * | knn_indices, | ||
std::vector< int * > & | y, | ||
size_t | n_index_rows, | ||
size_t | n_query_rows, | ||
int | k | ||
) |
Flat C++ API function to compute knn class probabilities using a vector of device arrays containing discrete class labels. Note that the output is a vector, which is.
[in] | handle | RAFT handle |
[out] | out | vector of output arrays on device. vector size = n_outputs. Each array should have size(n_samples, n_classes) |
[in] | knn_indices | array on device of knn indices (size n_samples * k) |
[in] | y | array of labels on device (size n_samples) |
[in] | n_index_rows | number of labels in y |
[in] | n_query_rows | number of rows in knn_indices and out |
[in] | k | number of nearest neighbors in knn_indices |
void ML::knn_classify | ( | raft::handle_t & | handle, |
int * | out, | ||
int64_t * | knn_indices, | ||
std::vector< int * > & | y, | ||
size_t | n_index_rows, | ||
size_t | n_query_rows, | ||
int | k | ||
) |
Flat C++ API function to perform a knn classification using a given a vector of label arrays. This supports multilabel classification by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn.
[in] | handle | RAFT handle |
[out] | out | output array on device (size n_samples * size of y vector) |
[in] | knn_indices | index array on device resulting from knn query (size n_samples * k) |
[in] | y | vector of label arrays on device vector size is number of (size n_samples) |
[in] | n_index_rows | number of vertices in index (eg. size of each y array) |
[in] | n_query_rows | number of samples in knn_indices |
[in] | k | number of nearest neighbors in knn_indices |
void ML::knn_regress | ( | raft::handle_t & | handle, |
float * | out, | ||
int64_t * | knn_indices, | ||
std::vector< float * > & | y, | ||
size_t | n_index_rows, | ||
size_t | n_query_rows, | ||
int | k | ||
) |
Flat C++ API function to perform a knn regression using a given a vector of label arrays. This supports multilabel regression by classifying on multiple label arrays. Note that each label is classified independently, as is done in scikit-learn.
[in] | handle | RAFT handle |
[out] | out | output array on device (size n_samples) |
[in] | knn_indices | array on device of knn indices (size n_samples * k) |
[in] | y | array of labels on device (size n_samples) |
[in] | n_index_rows | number of vertices in index (eg. size of each y array) |
[in] | n_query_rows | number of samples in knn_indices and out |
[in] | k | number of nearest neighbors in knn_indices |
cumlError_t ML::knn_search | ( | const cumlHandle_t | handle, |
float ** | input, | ||
int * | sizes, | ||
int | n_params, | ||
int | D, | ||
float * | search_items, | ||
int | n, | ||
int64_t * | res_I, | ||
float * | res_D, | ||
int | k, | ||
bool | rowMajorIndex, | ||
bool | rowMajorQuery, | ||
int | metric_type, | ||
float | metric_arg, | ||
bool | expanded | ||
) |
Flat C API function to perform a brute force knn on a series of input arrays and combine the results into a single output array for indexes and distances.
[in] | handle | the cuml handle to use |
[in] | input | an array of pointers to the input arrays |
[in] | sizes | an array of sizes of input arrays |
[in] | n_params | array size of input and sizes |
[in] | D | the dimensionality of the arrays |
[in] | search_items | array of items to search of dimensionality D |
[in] | n | number of rows in search_items |
[out] | res_I | the resulting index array of size n * k |
[out] | res_D | the resulting distance array of size n * k |
[in] | k | the number of nearest neighbors to return |
[in] | rowMajorIndex | is the index array in row major layout? |
[in] | rowMajorQuery | is the query array in row major layout? |
[in] | metric_type | distance metric to use. Specify the metric using the integer value of the enum ML::MetricType . |
[in] | metric_arg | the value of p for Minkowski (l-p) distances. This is ignored if the metric_type is not Minkowski. |
[in] | expanded | should lp-based distances be returned in their expanded form (e.g., without raising to the 1/p power). |
|
inline |
void ML::merge_series | ( | const raft::handle_t & | handle, |
const double *const * | hd_in, | ||
const int * | d_id_to_pos, | ||
const int * | d_id_to_sub, | ||
double * | d_out, | ||
int | batch_size, | ||
int | n_sub, | ||
int | n_obs | ||
) |
void ML::merge_series | ( | const raft::handle_t & | handle, |
const float *const * | hd_in, | ||
const int * | d_id_to_pos, | ||
const int * | d_id_to_sub, | ||
float * | d_out, | ||
int | batch_size, | ||
int | n_sub, | ||
int | n_obs | ||
) |
Merge multiple sub-batches into one batch according to the maps that associate each id in the unique batch to a sub-batch and a position in this sub-batch.
[in] | handle | cuML handle |
[in] | hd_in | Host array of pointers to device arrays containing the sub-batches |
[in] | d_id_to_pos | Device array containing the position of each member in its new sub-batch |
[in] | d_id_to_sub | Device array associating each member with its sub-batch |
[out] | d_out | Output merged batch |
[in] | batch_size | Batch size |
[in] | n_sub | Number of sub-batches |
[in] | n_obs | Number of observations (or forecasts) per series |
|
inline |
|
inline |
|
inline |
std::ostream& ML::operator<< | ( | std::ostream & | os, |
const SimpleDenseMat< T > & | mat | ||
) |
std::ostream& ML::operator<< | ( | std::ostream & | os, |
const SimpleSparseMat< T, I > & | mat | ||
) |
std::ostream& ML::operator<< | ( | std::ostream & | os, |
const SimpleVec< T > & | v | ||
) |
void ML::out_of_sample_predict | ( | const raft::handle_t & | handle, |
HDBSCAN::Common::CondensedHierarchy< int, float > & | condensed_tree, | ||
HDBSCAN::Common::PredictionData< int, float > & | prediction_data, | ||
const float * | X, | ||
int * | labels, | ||
const float * | points_to_predict, | ||
size_t | n_prediction_points, | ||
cuvs::distance::DistanceType | metric, | ||
int | min_samples, | ||
int * | out_labels, | ||
float * | out_probabilities | ||
) |
void ML::pack | ( | raft::handle_t & | handle, |
const ARIMAParams< double > & | params, | ||
const ARIMAOrder & | order, | ||
int | batch_size, | ||
double * | param_vec | ||
) |
Pack separate parameter arrays into a compact array
[in] | handle | cuML handle |
[in] | params | Parameter structure |
[in] | order | ARIMA order |
[in] | batch_size | Batch size |
[out] | param_vec | Compact parameter array |
void ML::pcaFit | ( | raft::handle_t & | handle, |
double * | input, | ||
double * | components, | ||
double * | explained_var, | ||
double * | explained_var_ratio, | ||
double * | singular_vals, | ||
double * | mu, | ||
double * | noise_vars, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaFit | ( | raft::handle_t & | handle, |
float * | input, | ||
float * | components, | ||
float * | explained_var, | ||
float * | explained_var_ratio, | ||
float * | singular_vals, | ||
float * | mu, | ||
float * | noise_vars, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaFitTransform | ( | raft::handle_t & | handle, |
double * | input, | ||
double * | trans_input, | ||
double * | components, | ||
double * | explained_var, | ||
double * | explained_var_ratio, | ||
double * | singular_vals, | ||
double * | mu, | ||
double * | noise_vars, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaFitTransform | ( | raft::handle_t & | handle, |
float * | input, | ||
float * | trans_input, | ||
float * | components, | ||
float * | explained_var, | ||
float * | explained_var_ratio, | ||
float * | singular_vals, | ||
float * | mu, | ||
float * | noise_vars, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaInverseTransform | ( | raft::handle_t & | handle, |
double * | trans_input, | ||
double * | components, | ||
double * | singular_vals, | ||
double * | mu, | ||
double * | input, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaInverseTransform | ( | raft::handle_t & | handle, |
float * | trans_input, | ||
float * | components, | ||
float * | singular_vals, | ||
float * | mu, | ||
float * | input, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaTransform | ( | raft::handle_t & | handle, |
double * | input, | ||
double * | components, | ||
double * | trans_input, | ||
double * | singular_vals, | ||
double * | mu, | ||
const paramsPCA & | prms | ||
) |
void ML::pcaTransform | ( | raft::handle_t & | handle, |
float * | input, | ||
float * | components, | ||
float * | trans_input, | ||
float * | singular_vals, | ||
float * | mu, | ||
const paramsPCA & | prms | ||
) |
|
inline |
Pop the latest range
|
inline |
Synchronize CUDA stream and pop the latest nvtx range.
stream | stream to synchronize |
void ML::postprocess_labels | ( | int | n_rows, |
std::vector< int > & | labels, | ||
std::map< int, int > & | labels_map, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::predict | ( | const raft::handle_t & | user_handle, |
const RandomForestClassifierD * | forest, | ||
const double * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
int * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::predict | ( | const raft::handle_t & | user_handle, |
const RandomForestClassifierF * | forest, | ||
const float * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
int * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::predict | ( | const raft::handle_t & | user_handle, |
const RandomForestRegressorD * | forest, | ||
const double * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
double * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::predict | ( | const raft::handle_t & | user_handle, |
const RandomForestRegressorF * | forest, | ||
const float * | input, | ||
int | n_rows, | ||
int | n_cols, | ||
float * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::predict | ( | raft::handle_t & | handle, |
const ARIMAMemory< double > & | arima_mem, | ||
const double * | d_y, | ||
const double * | d_exog, | ||
const double * | d_exog_fut, | ||
int | batch_size, | ||
int | n_obs, | ||
int | start, | ||
int | end, | ||
const ARIMAOrder & | order, | ||
const ARIMAParams< double > & | params, | ||
double * | d_y_p, | ||
bool | pre_diff = true , |
||
double | level = 0 , |
||
double * | d_lower = nullptr , |
||
double * | d_upper = nullptr |
||
) |
Batched in-sample and out-of-sample prediction of a time-series given all the model parameters
[in] | handle | cuML handle |
[in] | arima_mem | Pre-allocated temporary memory |
[in] | d_y | Batched Time series to predict. Shape: (num_samples, batch size) (device) |
[in] | d_exog | Exogenous variables. Shape = (n_obs, n_exog * batch_size) (device) |
[in] | d_exog_fut | Future values of exogenous variables Shape: (end - n_obs, batch_size) (device) |
[in] | batch_size | Total number of batched time series |
[in] | n_obs | Number of samples per time series (all series must be identical) |
[in] | start | Index to start the prediction |
[in] | end | Index to end the prediction (excluded) |
[in] | order | ARIMA hyper-parameters |
[in] | params | ARIMA parameters (device) |
[out] | d_y_p | Prediction output (device) |
[in] | pre_diff | Whether to use pre-differencing |
[in] | level | Confidence level for prediction intervals. 0 to skip the computation. Else 0 < level < 1 |
[out] | d_lower | Lower limit of the prediction interval |
[out] | d_upper | Upper limit of the prediction interval |
void ML::preprocess_labels | ( | int | n_rows, |
std::vector< int > & | labels, | ||
std::map< int, int > & | labels_map, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
void ML::print | ( | const RF_metrics | rf_metrics | ) |
|
inline |
Push a named nvtx range.
name | range name |
|
inline |
Synchronize CUDA stream and push a named nvtx range.
name | range name |
stream | stream to synchronize |
void ML::rbc_build_index | ( | const raft::handle_t & | handle, |
raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > & | index | ||
) |
void ML::rbc_knn_query | ( | const raft::handle_t & | handle, |
raft::spatial::knn::BallCoverIndex< int64_t, float, uint32_t > & | index, | ||
uint32_t | k, | ||
const float * | search_items, | ||
uint32_t | n_search_items, | ||
int64_t * | out_inds, | ||
float * | out_dists | ||
) |
RF_metrics ML::score | ( | const raft::handle_t & | user_handle, |
const RandomForestClassifierD * | forest, | ||
const int * | ref_labels, | ||
int | n_rows, | ||
const int * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
RF_metrics ML::score | ( | const raft::handle_t & | user_handle, |
const RandomForestClassifierF * | forest, | ||
const int * | ref_labels, | ||
int | n_rows, | ||
const int * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
RF_metrics ML::score | ( | const raft::handle_t & | user_handle, |
const RandomForestRegressorD * | forest, | ||
const double * | ref_labels, | ||
int | n_rows, | ||
const double * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
RF_metrics ML::score | ( | const raft::handle_t & | user_handle, |
const RandomForestRegressorF * | forest, | ||
const float * | ref_labels, | ||
int | n_rows, | ||
const float * | predictions, | ||
int | verbosity = CUML_LEVEL_INFO |
||
) |
RF_metrics ML::set_all_rf_metrics | ( | RF_type | rf_type, |
float | accuracy, | ||
double | mean_abs_error, | ||
double | mean_squared_error, | ||
double | median_abs_error | ||
) |
RF_metrics ML::set_rf_metrics_classification | ( | float | accuracy | ) |
RF_metrics ML::set_rf_metrics_regression | ( | double | mean_abs_error, |
double | mean_squared_error, | ||
double | median_abs_error | ||
) |
RF_params ML::set_rf_params | ( | int | max_depth, |
int | max_leaves, | ||
float | max_features, | ||
int | max_n_bins, | ||
int | min_samples_leaf, | ||
int | min_samples_split, | ||
float | min_impurity_decrease, | ||
bool | bootstrap, | ||
int | n_trees, | ||
float | max_samples, | ||
uint64_t | seed, | ||
CRITERION | split_criterion, | ||
int | cfg_n_streams, | ||
int | max_batch_size | ||
) |
void ML::single_linkage_neighbors | ( | const raft::handle_t & | handle, |
const float * | X, | ||
size_t | m, | ||
size_t | n, | ||
raft::hierarchy::linkage_output< int > * | out, | ||
cuvs::distance::DistanceType | metric = cuvs::distance::DistanceType::L2Unexpanded , |
||
int | c = 15 , |
||
int | n_clusters = 5 |
||
) |
Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using a k-nearest neighbors graph. While this strategy enables the algorithm to scale to much higher numbers of rows, it comes with the downside that additional knn steps may need to be executed to connect an otherwise unconnected k-nn graph.
[in] | handle | raft handle to encapsulate expensive resources |
[in] | X | dense feature matrix on device |
[in] | m | number of rows in X |
[in] | n | number of columns in X |
[in] | metric | distance metric to use. Must be supported by the dense pairwise distances API. |
[out] | out | container object for output arrays |
[out] | c | the optimal value of k is guaranteed to be at least log(n) + c where c is some constant. This constant can usually be set to a fairly low value, like 15, and still maintain good performance. |
[out] | n_clusters | number of clusters to cut from resulting dendrogram |
void ML::single_linkage_pairwise | ( | const raft::handle_t & | handle, |
const float * | X, | ||
size_t | m, | ||
size_t | n, | ||
raft::hierarchy::linkage_output< int > * | out, | ||
cuvs::distance::DistanceType | metric, | ||
int | n_clusters = 5 |
||
) |
Computes single-linkage hierarchical clustering on a dense input feature matrix and outputs the labels, dendrogram, and minimum spanning tree. Connectivities are constructed using the full n^2 pairwise distance matrix. This can be very fast for smaller datasets when there is enough memory available.
[in] | handle | raft handle to encapsulate expensive resources |
[in] | X | dense feature matrix on device |
[in] | m | number of rows in X |
[in] | n | number of columns in X |
[in] | metric | distance metric to use. Must be supported by the dense pairwise distances API. |
[out] | out | container object for output arrays |
[out] | n_clusters | number of clusters to cut from resulting dendrogram |
void ML::single_linkage_pairwise | ( | const raft::handle_t & | handle, |
const float * | X, | ||
size_t | m, | ||
size_t | n, | ||
raft::hierarchy::linkage_output< int64_t > * | out, | ||
cuvs::distance::DistanceType | metric, | ||
int | n_clusters = 5 |
||
) |
|
inline |
void ML::TSNE_fit | ( | const raft::handle_t & | handle, |
float * | X, | ||
float * | Y, | ||
int | n, | ||
int | p, | ||
int64_t * | knn_indices, | ||
float * | knn_dists, | ||
TSNEParams & | params, | ||
float * | kl_div = nullptr |
||
) |
Dimensionality reduction via TSNE using Barnes-Hut, Fourier Interpolation, or naive methods. or brute force O(N^2).
[in] | handle | The GPU handle. |
[in] | X | The row-major dataset in device memory. |
[out] | Y | The column-major final embedding in device memory |
[in] | n | Number of rows in data X. |
[in] | p | Number of columns in data X. |
[in] | knn_indices | Array containing nearest neighbors indices. |
[in] | knn_dists | Array containing nearest neighbors distances. |
[in] | params | Parameters for TSNE model |
[out] | kl_div | (optional) KL divergence output |
The CUDA implementation is derived from the excellent CannyLabs open source implementation here: https://github.com/CannyLab/tsne-cuda/. The CannyLabs code is licensed according to the conditions in cuml/cpp/src/tsne/cannylabs_tsne_license.txt. A full description of their approach is available in their article t-SNE-CUDA: GPU-Accelerated t-SNE and its Applications to Modern Data (https://arxiv.org/abs/1807.11824).
void ML::TSNE_fit_sparse | ( | const raft::handle_t & | handle, |
int * | indptr, | ||
int * | indices, | ||
float * | data, | ||
float * | Y, | ||
int | nnz, | ||
int | n, | ||
int | p, | ||
int * | knn_indices, | ||
float * | knn_dists, | ||
TSNEParams & | params, | ||
float * | kl_div = nullptr |
||
) |
Dimensionality reduction via TSNE using either Barnes Hut O(NlogN) or brute force O(N^2).
[in] | handle | The GPU handle. |
[in] | indptr | indptr of CSR dataset. |
[in] | indices | indices of CSR dataset. |
[in] | data | data of CSR dataset. |
[out] | Y | The final embedding. |
[in] | nnz | The number of non-zero entries in the CSR. |
[in] | n | Number of rows in data X. |
[in] | p | Number of columns in data X. |
[in] | knn_indices | Array containing nearest neighbors indices. |
[in] | knn_dists | Array containing nearest neighbors distances. |
[in] | params | Parameters for TSNE model |
[out] | kl_div | (optional) KL divergence output |
The CUDA implementation is derived from the excellent CannyLabs open source implementation here: https://github.com/CannyLab/tsne-cuda/. The CannyLabs code is licensed according to the conditions in cuml/cpp/src/tsne/cannylabs_tsne_license.txt. A full description of their approach is available in their article t-SNE-CUDA: GPU-Accelerated t-SNE and its Applications to Modern Data (https://arxiv.org/abs/1807.11824).
void ML::tsvdFit | ( | raft::handle_t & | handle, |
double * | input, | ||
double * | components, | ||
double * | singular_vals, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdFit | ( | raft::handle_t & | handle, |
float * | input, | ||
float * | components, | ||
float * | singular_vals, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdFitTransform | ( | raft::handle_t & | handle, |
double * | input, | ||
double * | trans_input, | ||
double * | components, | ||
double * | explained_var, | ||
double * | explained_var_ratio, | ||
double * | singular_vals, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdFitTransform | ( | raft::handle_t & | handle, |
float * | input, | ||
float * | trans_input, | ||
float * | components, | ||
float * | explained_var, | ||
float * | explained_var_ratio, | ||
float * | singular_vals, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdInverseTransform | ( | raft::handle_t & | handle, |
double * | trans_input, | ||
double * | components, | ||
double * | input, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdInverseTransform | ( | raft::handle_t & | handle, |
float * | trans_input, | ||
float * | components, | ||
float * | input, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdTransform | ( | raft::handle_t & | handle, |
double * | input, | ||
double * | components, | ||
double * | trans_input, | ||
const paramsTSVD & | prms | ||
) |
void ML::tsvdTransform | ( | raft::handle_t & | handle, |
float * | input, | ||
float * | components, | ||
float * | trans_input, | ||
const paramsTSVD & | prms | ||
) |
void ML::unpack | ( | raft::handle_t & | handle, |
ARIMAParams< double > & | params, | ||
const ARIMAOrder & | order, | ||
int | batch_size, | ||
const double * | param_vec | ||
) |
Unpack a compact array into separate parameter arrays
[in] | handle | cuML handle |
[out] | params | Parameter structure |
[in] | order | ARIMA order |
[in] | batch_size | Batch size |
[in] | param_vec | Compact parameter array |
HandleMap ML::handleMap |
Static handle map instance (see cumlHandle.cpp)