19 #include <raft/core/handle.hpp>
21 #include <rmm/device_uvector.hpp>
23 #include <cuvs/distance/distance.hpp>
39 template <
typename value_
idx,
typename value_t>
85 rmm::device_uvector<value_idx>&& parents_,
86 rmm::device_uvector<value_idx>&& children_,
87 rmm::device_uvector<value_t>&& lambdas_,
88 rmm::device_uvector<value_idx>&& sizes_);
108 value_idx* full_children,
109 value_t* full_lambdas,
110 value_idx* full_sizes,
111 value_idx size = -1);
124 const raft::handle_t& handle;
126 rmm::device_uvector<value_idx> parents;
127 rmm::device_uvector<value_idx> children;
128 rmm::device_uvector<value_t> lambdas;
129 rmm::device_uvector<value_idx> sizes;
134 value_idx root_cluster;
163 template <
typename value_
idx,
typename value_t>
182 value_idx* children_,
187 value_t* mst_weights_)
253 template <
typename value_
idx,
typename value_t>
259 value_t* probabilities_,
260 value_idx* children_,
265 value_t* mst_weights_)
267 handle_, n_leaves_, labels_, children_, sizes_, deltas_, mst_src_, mst_dst_, mst_weights_),
268 probabilities(probabilities_),
269 stabilities(0, handle_.get_stream()),
270 condensed_tree(handle_, n_leaves_),
271 inverse_label_map(0, handle_.get_stream())
292 stabilities.resize(n_clusters_,
299 value_t* probabilities;
302 rmm::device_uvector<value_idx> inverse_label_map;
306 rmm::device_uvector<value_t> stabilities;
314 template class CondensedHierarchy<int, float>;
323 template <
typename value_
idx,
typename value_t>
326 PredictionData(
const raft::handle_t& handle_, value_idx m, value_idx n, value_t* core_dists_)
328 exemplar_idx(0, handle.get_stream()),
329 exemplar_label_offsets(0, handle.get_stream()),
330 n_selected_clusters(0),
331 selected_clusters(0, handle.get_stream()),
332 deaths(0, handle.get_stream()),
333 core_dists(core_dists_),
334 index_into_children(0, handle.get_stream()),
364 value_idx n_exemplars_,
365 value_idx n_selected_clusters_,
375 deaths.resize(n_clusters_, handle.get_stream());
379 const raft::handle_t& handle;
380 rmm::device_uvector<value_idx> exemplar_idx;
381 rmm::device_uvector<value_idx> exemplar_label_offsets;
382 value_idx n_exemplars;
383 value_idx n_selected_clusters;
384 rmm::device_uvector<value_idx> selected_clusters;
385 rmm::device_uvector<value_t> deaths;
387 rmm::device_uvector<value_idx> index_into_children;
390 template class PredictionData<int, float>;
395 int* inverse_label_map,
396 int n_selected_clusters,
428 cuvs::distance::DistanceType metric,
437 int min_cluster_size,
449 float* probabilities,
451 bool allow_single_cluster,
452 int max_cluster_size,
453 float cluster_selection_epsilon);
456 const raft::handle_t& handle,
460 cuvs::distance::DistanceType metric,
461 float* membership_vec,
462 size_t batch_size = 4096);
468 const float* points_to_predict,
469 size_t n_prediction_points,
471 cuvs::distance::DistanceType metric,
472 float* membership_vec,
473 size_t batch_size = 4096);
480 const float* points_to_predict,
481 size_t n_prediction_points,
482 cuvs::distance::DistanceType metric,
485 float* out_probabilities);
487 namespace HDBSCAN::HELPER {
505 cuvs::distance::DistanceType metric,
525 rmm::device_uvector<int>& inverse_label_map,
526 bool allow_single_cluster,
527 int max_cluster_size,
528 float cluster_selection_epsilon);
Definition: hdbscan.hpp:40
value_idx * get_sizes()
Definition: hdbscan.hpp:118
value_t * get_lambdas()
Definition: hdbscan.hpp:117
value_idx get_n_leaves() const
Definition: hdbscan.hpp:121
value_idx get_n_edges()
Definition: hdbscan.hpp:119
value_idx * get_children()
Definition: hdbscan.hpp:116
int get_n_clusters()
Definition: hdbscan.hpp:120
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_)
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, int n_clusters_, rmm::device_uvector< value_idx > &&parents_, rmm::device_uvector< value_idx > &&children_, rmm::device_uvector< value_t > &&lambdas_, rmm::device_uvector< value_idx > &&sizes_)
value_idx * get_parents()
Definition: hdbscan.hpp:115
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, value_idx *parents_, value_idx *children_, value_t *lambdas_, value_idx *sizes_)
void condense(value_idx *full_parents, value_idx *full_children, value_t *full_lambdas, value_idx *full_sizes, value_idx size=-1)
value_idx get_cluster_tree_edges()
Definition: hdbscan.hpp:152
CLUSTER_SELECTION_METHOD cluster_selection_method
Definition: hdbscan.hpp:154
Definition: hdbscan.hpp:324
value_t * get_core_dists()
Definition: hdbscan.hpp:352
void allocate(const raft::handle_t &handle, value_idx n_exemplars_, value_idx n_selected_clusters_, value_idx n_edges_)
value_t * get_deaths()
Definition: hdbscan.hpp:351
value_idx get_n_selected_clusters()
Definition: hdbscan.hpp:347
PredictionData(const raft::handle_t &handle_, value_idx m, value_idx n, value_t *core_dists_)
Definition: hdbscan.hpp:326
value_idx * get_exemplar_label_offsets()
Definition: hdbscan.hpp:349
value_idx * get_index_into_children()
Definition: hdbscan.hpp:353
void set_n_clusters(const raft::handle_t &handle, value_idx n_clusters_)
Definition: hdbscan.hpp:373
value_idx * get_exemplar_idx()
Definition: hdbscan.hpp:348
value_idx * get_selected_clusters()
Definition: hdbscan.hpp:350
value_idx get_n_exemplars()
Definition: hdbscan.hpp:346
size_t n_rows
Definition: hdbscan.hpp:340
size_t n_cols
Definition: hdbscan.hpp:341
Definition: hdbscan.hpp:139
float alpha
Definition: hdbscan.hpp:149
float cluster_selection_epsilon
Definition: hdbscan.hpp:145
int max_cluster_size
Definition: hdbscan.hpp:143
bool allow_single_cluster
Definition: hdbscan.hpp:147
int min_cluster_size
Definition: hdbscan.hpp:142
int min_samples
Definition: hdbscan.hpp:141
Definition: hdbscan.hpp:254
CondensedHierarchy< value_idx, value_t > & get_condensed_tree()
Definition: hdbscan.hpp:296
value_t * get_stabilities()
Definition: hdbscan.hpp:279
rmm::device_uvector< value_idx > & _get_inverse_label_map()
Definition: hdbscan.hpp:282
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:289
value_t * get_probabilities()
Definition: hdbscan.hpp:278
hdbscan_output(const raft::handle_t &handle_, int n_leaves_, value_idx *labels_, value_t *probabilities_, value_idx *children_, value_idx *sizes_, value_t *deltas_, value_idx *mst_src_, value_idx *mst_dst_, value_t *mst_weights_)
Definition: hdbscan.hpp:256
value_idx * get_inverse_label_map()
Definition: hdbscan.hpp:280
Definition: hdbscan.hpp:164
const raft::handle_t & get_handle()
Definition: hdbscan.hpp:218
value_idx * children
Definition: hdbscan.hpp:228
value_idx * labels
Definition: hdbscan.hpp:225
int get_n_leaves() const
Definition: hdbscan.hpp:201
robust_single_linkage_output(const raft::handle_t &handle_, int n_leaves_, value_idx *labels_, value_idx *children_, value_idx *sizes_, value_t *deltas_, value_idx *mst_src_, value_idx *mst_dst_, value_t *mst_weights_)
Definition: hdbscan.hpp:179
int n_leaves
Definition: hdbscan.hpp:222
int get_n_clusters() const
Definition: hdbscan.hpp:202
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:215
value_t * mst_weights
Definition: hdbscan.hpp:235
value_idx * get_sizes()
Definition: hdbscan.hpp:205
value_t * deltas
Definition: hdbscan.hpp:230
const raft::handle_t & handle
Definition: hdbscan.hpp:220
value_idx * get_mst_src()
Definition: hdbscan.hpp:207
value_t * get_mst_weights()
Definition: hdbscan.hpp:209
value_idx * get_mst_dst()
Definition: hdbscan.hpp:208
value_idx * get_labels()
Definition: hdbscan.hpp:203
value_idx * sizes
Definition: hdbscan.hpp:229
value_t * get_deltas()
Definition: hdbscan.hpp:206
int n_clusters
Definition: hdbscan.hpp:223
value_idx * mst_src
Definition: hdbscan.hpp:233
value_idx * mst_dst
Definition: hdbscan.hpp:234
value_idx * get_children()
Definition: hdbscan.hpp:204
Definition: params.hpp:34
void generate_prediction_data(const raft::handle_t &handle, CondensedHierarchy< int, float > &condensed_tree, int *labels, int *inverse_label_map, int n_selected_clusters, PredictionData< int, float > &prediction_data)
CLUSTER_SELECTION_METHOD
Definition: hdbscan.hpp:137
@ EOM
Definition: hdbscan.hpp:137
@ LEAF
Definition: hdbscan.hpp:137
void compute_inverse_label_map(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, size_t n_leaves, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, rmm::device_uvector< int > &inverse_label_map, bool allow_single_cluster, int max_cluster_size, float cluster_selection_epsilon)
Compute the map from final, normalize labels to the labels in the CondensedHierarchy.
void compute_core_dists(const raft::handle_t &handle, const float *X, float *core_dists, size_t m, size_t n, cuvs::distance::DistanceType metric, int min_samples)
Compute the core distances for each point in the training matrix.
Definition: dbscan.hpp:30
void build_condensed_hierarchy(const raft::handle_t &handle, const int *children, const float *delta, const int *sizes, int min_cluster_size, int n_leaves, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree)
void compute_membership_vector(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, const float *points_to_predict, size_t n_prediction_points, int min_samples, cuvs::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
void out_of_sample_predict(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, int *labels, const float *points_to_predict, size_t n_prediction_points, cuvs::distance::DistanceType metric, int min_samples, int *out_labels, float *out_probabilities)
void compute_all_points_membership_vectors(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, cuvs::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
void _extract_clusters(const raft::handle_t &handle, size_t n_leaves, int n_edges, int *parents, int *children, float *lambdas, int *sizes, int *labels, float *probabilities, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, bool allow_single_cluster, int max_cluster_size, float cluster_selection_epsilon)
void hdbscan(const raft::handle_t &handle, const float *X, size_t m, size_t n, cuvs::distance::DistanceType metric, HDBSCAN::Common::HDBSCANParams ¶ms, HDBSCAN::Common::hdbscan_output< int, float > &out, float *core_dists)