10 #include <raft/core/handle.hpp>
12 #include <rmm/device_uvector.hpp>
28 template <
typename value_
idx,
typename value_t>
74 rmm::device_uvector<value_idx>&& parents_,
75 rmm::device_uvector<value_idx>&& children_,
76 rmm::device_uvector<value_t>&& lambdas_,
77 rmm::device_uvector<value_idx>&& sizes_);
97 value_idx* full_children,
98 value_t* full_lambdas,
99 value_idx* full_sizes,
100 value_idx size = -1);
113 const raft::handle_t& handle;
115 rmm::device_uvector<value_idx> parents;
116 rmm::device_uvector<value_idx> children;
117 rmm::device_uvector<value_t> lambdas;
118 rmm::device_uvector<value_idx> sizes;
123 value_idx root_cluster;
208 template <
typename value_
idx,
typename value_t>
227 value_idx* children_,
232 value_t* mst_weights_)
298 template <
typename value_
idx,
typename value_t>
304 value_t* probabilities_,
305 value_idx* children_,
310 value_t* mst_weights_)
312 handle_, n_leaves_, labels_, children_, sizes_, deltas_, mst_src_, mst_dst_, mst_weights_),
313 probabilities(probabilities_),
314 stabilities(0, handle_.get_stream()),
315 condensed_tree(handle_, n_leaves_),
316 inverse_label_map(0, handle_.get_stream())
337 stabilities.resize(n_clusters_,
344 value_t* probabilities;
347 rmm::device_uvector<value_idx> inverse_label_map;
351 rmm::device_uvector<value_t> stabilities;
359 template class CondensedHierarchy<int64_t, float>;
368 template <
typename value_
idx,
typename value_t>
371 PredictionData(
const raft::handle_t& handle_, value_idx m, value_idx n, value_t* core_dists_)
373 exemplar_idx(0, handle.get_stream()),
374 exemplar_label_offsets(0, handle.get_stream()),
375 n_selected_clusters(0),
376 selected_clusters(0, handle.get_stream()),
377 deaths(0, handle.get_stream()),
378 core_dists(core_dists_),
379 index_into_children(0, handle.get_stream()),
409 value_idx n_exemplars_,
410 value_idx n_selected_clusters_,
420 deaths.resize(n_clusters_, handle.get_stream());
424 const raft::handle_t& handle;
425 rmm::device_uvector<value_idx> exemplar_idx;
426 rmm::device_uvector<value_idx> exemplar_label_offsets;
427 value_idx n_exemplars;
428 value_idx n_selected_clusters;
429 rmm::device_uvector<value_idx> selected_clusters;
430 rmm::device_uvector<value_t> deaths;
432 rmm::device_uvector<value_idx> index_into_children;
435 template class PredictionData<int64_t, float>;
440 int64_t* inverse_label_map,
441 int n_selected_clusters,
479 const int64_t* children,
481 const int64_t* sizes,
482 int min_cluster_size,
494 float* probabilities,
496 bool allow_single_cluster,
497 int64_t max_cluster_size,
498 float cluster_selection_epsilon);
501 const raft::handle_t& handle,
506 float* membership_vec,
507 size_t batch_size = 4096);
513 const float* points_to_predict,
514 size_t n_prediction_points,
517 float* membership_vec,
518 size_t batch_size = 4096);
525 const float* points_to_predict,
526 size_t n_prediction_points,
530 float* out_probabilities);
532 namespace HDBSCAN::HELPER {
570 rmm::device_uvector<int64_t>& inverse_label_map,
571 bool allow_single_cluster,
572 int64_t max_cluster_size,
573 float cluster_selection_epsilon);
Definition: hdbscan.hpp:29
value_idx * get_sizes()
Definition: hdbscan.hpp:107
value_t * get_lambdas()
Definition: hdbscan.hpp:106
value_idx get_n_leaves() const
Definition: hdbscan.hpp:110
value_idx get_n_edges()
Definition: hdbscan.hpp:108
value_idx * get_children()
Definition: hdbscan.hpp:105
int get_n_clusters()
Definition: hdbscan.hpp:109
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_)
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, int n_clusters_, rmm::device_uvector< value_idx > &&parents_, rmm::device_uvector< value_idx > &&children_, rmm::device_uvector< value_t > &&lambdas_, rmm::device_uvector< value_idx > &&sizes_)
value_idx * get_parents()
Definition: hdbscan.hpp:104
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, value_idx *parents_, value_idx *children_, value_t *lambdas_, value_idx *sizes_)
void condense(value_idx *full_parents, value_idx *full_children, value_t *full_lambdas, value_idx *full_sizes, value_idx size=-1)
value_idx get_cluster_tree_edges()
Definition: hdbscan.hpp:195
CLUSTER_SELECTION_METHOD cluster_selection_method
Definition: hdbscan.hpp:197
graph_build_params::graph_build_params build_params
Definition: hdbscan.hpp:199
GRAPH_BUILD_ALGO build_algo
Definition: hdbscan.hpp:198
Definition: hdbscan.hpp:369
value_t * get_core_dists()
Definition: hdbscan.hpp:397
void allocate(const raft::handle_t &handle, value_idx n_exemplars_, value_idx n_selected_clusters_, value_idx n_edges_)
value_t * get_deaths()
Definition: hdbscan.hpp:396
value_idx get_n_selected_clusters()
Definition: hdbscan.hpp:392
PredictionData(const raft::handle_t &handle_, value_idx m, value_idx n, value_t *core_dists_)
Definition: hdbscan.hpp:371
value_idx * get_exemplar_label_offsets()
Definition: hdbscan.hpp:394
value_idx * get_index_into_children()
Definition: hdbscan.hpp:398
void set_n_clusters(const raft::handle_t &handle, value_idx n_clusters_)
Definition: hdbscan.hpp:418
value_idx * get_exemplar_idx()
Definition: hdbscan.hpp:393
value_idx * get_selected_clusters()
Definition: hdbscan.hpp:395
value_idx get_n_exemplars()
Definition: hdbscan.hpp:391
size_t n_rows
Definition: hdbscan.hpp:385
size_t n_cols
Definition: hdbscan.hpp:386
Definition: hdbscan.hpp:129
float alpha
Definition: hdbscan.hpp:139
float cluster_selection_epsilon
Definition: hdbscan.hpp:135
int max_cluster_size
Definition: hdbscan.hpp:133
bool allow_single_cluster
Definition: hdbscan.hpp:137
int min_cluster_size
Definition: hdbscan.hpp:132
int min_samples
Definition: hdbscan.hpp:131
Definition: hdbscan.hpp:299
CondensedHierarchy< value_idx, value_t > & get_condensed_tree()
Definition: hdbscan.hpp:341
value_t * get_stabilities()
Definition: hdbscan.hpp:324
rmm::device_uvector< value_idx > & _get_inverse_label_map()
Definition: hdbscan.hpp:327
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:334
value_t * get_probabilities()
Definition: hdbscan.hpp:323
hdbscan_output(const raft::handle_t &handle_, int n_leaves_, value_idx *labels_, value_t *probabilities_, value_idx *children_, value_idx *sizes_, value_t *deltas_, value_idx *mst_src_, value_idx *mst_dst_, value_t *mst_weights_)
Definition: hdbscan.hpp:301
value_idx * get_inverse_label_map()
Definition: hdbscan.hpp:325
Definition: hdbscan.hpp:209
const raft::handle_t & get_handle()
Definition: hdbscan.hpp:263
value_idx * children
Definition: hdbscan.hpp:273
value_idx * labels
Definition: hdbscan.hpp:270
int get_n_leaves() const
Definition: hdbscan.hpp:246
robust_single_linkage_output(const raft::handle_t &handle_, int n_leaves_, value_idx *labels_, value_idx *children_, value_idx *sizes_, value_t *deltas_, value_idx *mst_src_, value_idx *mst_dst_, value_t *mst_weights_)
Definition: hdbscan.hpp:224
int n_leaves
Definition: hdbscan.hpp:267
int get_n_clusters() const
Definition: hdbscan.hpp:247
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:260
value_t * mst_weights
Definition: hdbscan.hpp:280
value_idx * get_sizes()
Definition: hdbscan.hpp:250
value_t * deltas
Definition: hdbscan.hpp:275
const raft::handle_t & handle
Definition: hdbscan.hpp:265
value_idx * get_mst_src()
Definition: hdbscan.hpp:252
value_t * get_mst_weights()
Definition: hdbscan.hpp:254
value_idx * get_mst_dst()
Definition: hdbscan.hpp:253
value_idx * get_labels()
Definition: hdbscan.hpp:248
value_idx * sizes
Definition: hdbscan.hpp:274
value_t * get_deltas()
Definition: hdbscan.hpp:251
int n_clusters
Definition: hdbscan.hpp:268
value_idx * mst_src
Definition: hdbscan.hpp:278
value_idx * mst_dst
Definition: hdbscan.hpp:279
value_idx * get_children()
Definition: hdbscan.hpp:249
Definition: params.hpp:23
CLUSTER_SELECTION_METHOD
Definition: hdbscan.hpp:126
@ EOM
Definition: hdbscan.hpp:126
@ LEAF
Definition: hdbscan.hpp:126
void generate_prediction_data(const raft::handle_t &handle, CondensedHierarchy< int64_t, float > &condensed_tree, int64_t *labels, int64_t *inverse_label_map, int n_selected_clusters, PredictionData< int64_t, float > &prediction_data)
GRAPH_BUILD_ALGO
Definition: hdbscan.hpp:127
@ NN_DESCENT
Definition: hdbscan.hpp:127
@ BRUTE_FORCE_KNN
Definition: hdbscan.hpp:127
void compute_inverse_label_map(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int64_t, float > &condensed_tree, size_t n_leaves, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, rmm::device_uvector< int64_t > &inverse_label_map, bool allow_single_cluster, int64_t max_cluster_size, float cluster_selection_epsilon)
Compute the map from final, normalize labels to the labels in the CondensedHierarchy.
void compute_core_dists(const raft::handle_t &handle, const float *X, float *core_dists, size_t m, size_t n, ML::distance::DistanceType metric, int min_samples)
Compute the core distances for each point in the training matrix.
DistanceType
Definition: distance_type.hpp:10
Definition: dbscan.hpp:18
void hdbscan(const raft::handle_t &handle, const float *X, size_t m, size_t n, ML::distance::DistanceType metric, HDBSCAN::Common::HDBSCANParams ¶ms, HDBSCAN::Common::hdbscan_output< int64_t, float > &out, float *core_dists)
void build_condensed_hierarchy(const raft::handle_t &handle, const int64_t *children, const float *delta, const int64_t *sizes, int min_cluster_size, int n_leaves, HDBSCAN::Common::CondensedHierarchy< int64_t, float > &condensed_tree)
void compute_membership_vector(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int64_t, float > &condensed_tree, HDBSCAN::Common::PredictionData< int64_t, float > &prediction_data, const float *X, const float *points_to_predict, size_t n_prediction_points, int min_samples, ML::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
void _extract_clusters(const raft::handle_t &handle, size_t n_leaves, int n_edges, int64_t *parents, int64_t *children, float *lambdas, int64_t *sizes, int64_t *labels, float *probabilities, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, bool allow_single_cluster, int64_t max_cluster_size, float cluster_selection_epsilon)
void out_of_sample_predict(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int64_t, float > &condensed_tree, HDBSCAN::Common::PredictionData< int64_t, float > &prediction_data, const float *X, int64_t *labels, const float *points_to_predict, size_t n_prediction_points, ML::distance::DistanceType metric, int min_samples, int64_t *out_labels, float *out_probabilities)
void compute_all_points_membership_vectors(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int64_t, float > &condensed_tree, HDBSCAN::Common::PredictionData< int64_t, float > &prediction_data, const float *X, ML::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
Definition: hdbscan.hpp:179
size_t overlap_factor
Definition: hdbscan.hpp:183
nn_descent_params_hdbscan nn_descent_params
Definition: hdbscan.hpp:191
size_t n_clusters
Definition: hdbscan.hpp:190
Definition: hdbscan.hpp:151
size_t max_iterations
Definition: hdbscan.hpp:156
size_t intermediate_graph_degree
Definition: hdbscan.hpp:155
size_t graph_degree
Definition: hdbscan.hpp:154
float termination_threshold
Definition: hdbscan.hpp:157