19 #include <raft/core/handle.hpp>
20 #include <raft/distance/distance_types.hpp>
22 #include <rmm/device_uvector.hpp>
38 template <
typename value_
idx,
typename value_t>
84 rmm::device_uvector<value_idx>&& parents_,
85 rmm::device_uvector<value_idx>&& children_,
86 rmm::device_uvector<value_t>&& lambdas_,
87 rmm::device_uvector<value_idx>&& sizes_);
107 value_idx* full_children,
108 value_t* full_lambdas,
109 value_idx* full_sizes,
110 value_idx size = -1);
123 const raft::handle_t& handle;
125 rmm::device_uvector<value_idx> parents;
126 rmm::device_uvector<value_idx> children;
127 rmm::device_uvector<value_t> lambdas;
128 rmm::device_uvector<value_idx> sizes;
133 value_idx root_cluster;
162 template <
typename value_
idx,
typename value_t>
181 value_idx* children_,
186 value_t* mst_weights_)
252 template <
typename value_
idx,
typename value_t>
258 value_t* probabilities_,
259 value_idx* children_,
264 value_t* mst_weights_)
266 handle_, n_leaves_, labels_, children_, sizes_, deltas_, mst_src_, mst_dst_, mst_weights_),
267 probabilities(probabilities_),
268 stabilities(0, handle_.get_stream()),
269 condensed_tree(handle_, n_leaves_),
270 inverse_label_map(0, handle_.get_stream())
291 stabilities.resize(n_clusters_,
298 value_t* probabilities;
301 rmm::device_uvector<value_idx> inverse_label_map;
305 rmm::device_uvector<value_t> stabilities;
313 template class CondensedHierarchy<int, float>;
322 template <
typename value_
idx,
typename value_t>
325 PredictionData(
const raft::handle_t& handle_, value_idx m, value_idx n, value_t* core_dists_)
327 exemplar_idx(0, handle.get_stream()),
328 exemplar_label_offsets(0, handle.get_stream()),
329 n_selected_clusters(0),
330 selected_clusters(0, handle.get_stream()),
331 deaths(0, handle.get_stream()),
332 core_dists(core_dists_),
333 index_into_children(0, handle.get_stream()),
363 value_idx n_exemplars_,
364 value_idx n_selected_clusters_,
374 deaths.resize(n_clusters_, handle.get_stream());
378 const raft::handle_t& handle;
379 rmm::device_uvector<value_idx> exemplar_idx;
380 rmm::device_uvector<value_idx> exemplar_label_offsets;
381 value_idx n_exemplars;
382 value_idx n_selected_clusters;
383 rmm::device_uvector<value_idx> selected_clusters;
384 rmm::device_uvector<value_t> deaths;
386 rmm::device_uvector<value_idx> index_into_children;
389 template class PredictionData<int, float>;
394 int* inverse_label_map,
395 int n_selected_clusters,
427 raft::distance::DistanceType metric,
436 int min_cluster_size,
448 float* probabilities,
450 bool allow_single_cluster,
451 int max_cluster_size,
452 float cluster_selection_epsilon);
455 const raft::handle_t& handle,
459 raft::distance::DistanceType metric,
460 float* membership_vec,
461 size_t batch_size = 4096);
467 const float* points_to_predict,
468 size_t n_prediction_points,
470 raft::distance::DistanceType metric,
471 float* membership_vec,
472 size_t batch_size = 4096);
479 const float* points_to_predict,
480 size_t n_prediction_points,
481 raft::distance::DistanceType metric,
484 float* out_probabilities);
486 namespace HDBSCAN::HELPER {
504 raft::distance::DistanceType metric,
524 rmm::device_uvector<int>& inverse_label_map,
525 bool allow_single_cluster,
526 int max_cluster_size,
527 float cluster_selection_epsilon);
Definition: hdbscan.hpp:39
value_idx * get_sizes()
Definition: hdbscan.hpp:117
value_t * get_lambdas()
Definition: hdbscan.hpp:116
value_idx get_n_leaves() const
Definition: hdbscan.hpp:120
value_idx get_n_edges()
Definition: hdbscan.hpp:118
value_idx * get_children()
Definition: hdbscan.hpp:115
int get_n_clusters()
Definition: hdbscan.hpp:119
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_)
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, int n_clusters_, rmm::device_uvector< value_idx > &&parents_, rmm::device_uvector< value_idx > &&children_, rmm::device_uvector< value_t > &&lambdas_, rmm::device_uvector< value_idx > &&sizes_)
value_idx * get_parents()
Definition: hdbscan.hpp:114
CondensedHierarchy(const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, value_idx *parents_, value_idx *children_, value_t *lambdas_, value_idx *sizes_)
void condense(value_idx *full_parents, value_idx *full_children, value_t *full_lambdas, value_idx *full_sizes, value_idx size=-1)
value_idx get_cluster_tree_edges()
Definition: hdbscan.hpp:151
CLUSTER_SELECTION_METHOD cluster_selection_method
Definition: hdbscan.hpp:153
Definition: hdbscan.hpp:323
value_t * get_core_dists()
Definition: hdbscan.hpp:351
void allocate(const raft::handle_t &handle, value_idx n_exemplars_, value_idx n_selected_clusters_, value_idx n_edges_)
value_t * get_deaths()
Definition: hdbscan.hpp:350
value_idx get_n_selected_clusters()
Definition: hdbscan.hpp:346
PredictionData(const raft::handle_t &handle_, value_idx m, value_idx n, value_t *core_dists_)
Definition: hdbscan.hpp:325
value_idx * get_exemplar_label_offsets()
Definition: hdbscan.hpp:348
value_idx * get_index_into_children()
Definition: hdbscan.hpp:352
void set_n_clusters(const raft::handle_t &handle, value_idx n_clusters_)
Definition: hdbscan.hpp:372
value_idx * get_exemplar_idx()
Definition: hdbscan.hpp:347
value_idx * get_selected_clusters()
Definition: hdbscan.hpp:349
value_idx get_n_exemplars()
Definition: hdbscan.hpp:345
size_t n_rows
Definition: hdbscan.hpp:339
size_t n_cols
Definition: hdbscan.hpp:340
Definition: hdbscan.hpp:138
float alpha
Definition: hdbscan.hpp:148
float cluster_selection_epsilon
Definition: hdbscan.hpp:144
int max_cluster_size
Definition: hdbscan.hpp:142
bool allow_single_cluster
Definition: hdbscan.hpp:146
int min_cluster_size
Definition: hdbscan.hpp:141
int min_samples
Definition: hdbscan.hpp:140
Definition: hdbscan.hpp:253
CondensedHierarchy< value_idx, value_t > & get_condensed_tree()
Definition: hdbscan.hpp:295
value_t * get_stabilities()
Definition: hdbscan.hpp:278
rmm::device_uvector< value_idx > & _get_inverse_label_map()
Definition: hdbscan.hpp:281
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:288
value_t * get_probabilities()
Definition: hdbscan.hpp:277
hdbscan_output(const raft::handle_t &handle_, int n_leaves_, value_idx *labels_, value_t *probabilities_, value_idx *children_, value_idx *sizes_, value_t *deltas_, value_idx *mst_src_, value_idx *mst_dst_, value_t *mst_weights_)
Definition: hdbscan.hpp:255
value_idx * get_inverse_label_map()
Definition: hdbscan.hpp:279
Definition: hdbscan.hpp:163
const raft::handle_t & get_handle()
Definition: hdbscan.hpp:217
value_idx * children
Definition: hdbscan.hpp:227
value_idx * labels
Definition: hdbscan.hpp:224
int get_n_leaves() const
Definition: hdbscan.hpp:200
robust_single_linkage_output(const raft::handle_t &handle_, int n_leaves_, value_idx *labels_, value_idx *children_, value_idx *sizes_, value_t *deltas_, value_idx *mst_src_, value_idx *mst_dst_, value_t *mst_weights_)
Definition: hdbscan.hpp:178
int n_leaves
Definition: hdbscan.hpp:221
int get_n_clusters() const
Definition: hdbscan.hpp:201
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:214
value_t * mst_weights
Definition: hdbscan.hpp:234
value_idx * get_sizes()
Definition: hdbscan.hpp:204
value_t * deltas
Definition: hdbscan.hpp:229
const raft::handle_t & handle
Definition: hdbscan.hpp:219
value_idx * get_mst_src()
Definition: hdbscan.hpp:206
value_t * get_mst_weights()
Definition: hdbscan.hpp:208
value_idx * get_mst_dst()
Definition: hdbscan.hpp:207
value_idx * get_labels()
Definition: hdbscan.hpp:202
value_idx * sizes
Definition: hdbscan.hpp:228
value_t * get_deltas()
Definition: hdbscan.hpp:205
int n_clusters
Definition: hdbscan.hpp:222
value_idx * mst_src
Definition: hdbscan.hpp:232
value_idx * mst_dst
Definition: hdbscan.hpp:233
value_idx * get_children()
Definition: hdbscan.hpp:203
Definition: params.hpp:34
void generate_prediction_data(const raft::handle_t &handle, CondensedHierarchy< int, float > &condensed_tree, int *labels, int *inverse_label_map, int n_selected_clusters, PredictionData< int, float > &prediction_data)
CLUSTER_SELECTION_METHOD
Definition: hdbscan.hpp:136
@ EOM
Definition: hdbscan.hpp:136
@ LEAF
Definition: hdbscan.hpp:136
void compute_inverse_label_map(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, size_t n_leaves, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, rmm::device_uvector< int > &inverse_label_map, bool allow_single_cluster, int max_cluster_size, float cluster_selection_epsilon)
Compute the map from final, normalize labels to the labels in the CondensedHierarchy.
void compute_core_dists(const raft::handle_t &handle, const float *X, float *core_dists, size_t m, size_t n, raft::distance::DistanceType metric, int min_samples)
Compute the core distances for each point in the training matrix.
Definition: dbscan.hpp:30
void build_condensed_hierarchy(const raft::handle_t &handle, const int *children, const float *delta, const int *sizes, int min_cluster_size, int n_leaves, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree)
void _extract_clusters(const raft::handle_t &handle, size_t n_leaves, int n_edges, int *parents, int *children, float *lambdas, int *sizes, int *labels, float *probabilities, HDBSCAN::Common::CLUSTER_SELECTION_METHOD cluster_selection_method, bool allow_single_cluster, int max_cluster_size, float cluster_selection_epsilon)
void compute_all_points_membership_vectors(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, raft::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
void compute_membership_vector(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, const float *points_to_predict, size_t n_prediction_points, int min_samples, raft::distance::DistanceType metric, float *membership_vec, size_t batch_size=4096)
void out_of_sample_predict(const raft::handle_t &handle, HDBSCAN::Common::CondensedHierarchy< int, float > &condensed_tree, HDBSCAN::Common::PredictionData< int, float > &prediction_data, const float *X, int *labels, const float *points_to_predict, size_t n_prediction_points, raft::distance::DistanceType metric, int min_samples, int *out_labels, float *out_probabilities)
void hdbscan(const raft::handle_t &handle, const float *X, size_t m, size_t n, raft::distance::DistanceType metric, HDBSCAN::Common::HDBSCANParams ¶ms, HDBSCAN::Common::hdbscan_output< int, float > &out, float *core_dists)