#include <hdbscan.hpp>
Public Member Functions | |
CondensedHierarchy (const raft::handle_t &handle_, size_t n_leaves_) | |
CondensedHierarchy (const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, value_idx *parents_, value_idx *children_, value_t *lambdas_, value_idx *sizes_) | |
CondensedHierarchy (const raft::handle_t &handle_, size_t n_leaves_, int n_edges_, int n_clusters_, rmm::device_uvector< value_idx > &&parents_, rmm::device_uvector< value_idx > &&children_, rmm::device_uvector< value_t > &&lambdas_, rmm::device_uvector< value_idx > &&sizes_) | |
void | condense (value_idx *full_parents, value_idx *full_children, value_t *full_lambdas, value_idx *full_sizes, value_idx size=-1) |
value_idx | get_cluster_tree_edges () |
value_idx * | get_parents () |
value_idx * | get_children () |
value_t * | get_lambdas () |
value_idx * | get_sizes () |
value_idx | get_n_edges () |
int | get_n_clusters () |
value_idx | get_n_leaves () const |
The Condensed hierarchicy is represented by an edge list with parents as the source vertices, children as the destination, with attributes for the cluster size and lambda value.
value_idx | |
value_t |
ML::HDBSCAN::Common::CondensedHierarchy< value_idx, value_t >::CondensedHierarchy | ( | const raft::handle_t & | handle_, |
size_t | n_leaves_ | ||
) |
Constructs an empty condensed hierarchy object which requires condense() to be called in order to populate the state.
handle_ | |
n_leaves_ |
ML::HDBSCAN::Common::CondensedHierarchy< value_idx, value_t >::CondensedHierarchy | ( | const raft::handle_t & | handle_, |
size_t | n_leaves_, | ||
int | n_edges_, | ||
value_idx * | parents_, | ||
value_idx * | children_, | ||
value_t * | lambdas_, | ||
value_idx * | sizes_ | ||
) |
Constructs a condensed hierarchy object with existing arrays which already contain a condensed hierarchy.
handle_ | |
n_leaves_ | |
n_edges_ | |
parents_ | |
children_ | |
lambdas_ | |
sizes_ |
ML::HDBSCAN::Common::CondensedHierarchy< value_idx, value_t >::CondensedHierarchy | ( | const raft::handle_t & | handle_, |
size_t | n_leaves_, | ||
int | n_edges_, | ||
int | n_clusters_, | ||
rmm::device_uvector< value_idx > && | parents_, | ||
rmm::device_uvector< value_idx > && | children_, | ||
rmm::device_uvector< value_t > && | lambdas_, | ||
rmm::device_uvector< value_idx > && | sizes_ | ||
) |
Constructs a condensed hierarchy object by moving rmm::device_uvector. Used to construct cluster trees
handle_ | |
n_leaves_ | |
n_edges_ | |
n_clusters_ | |
parents_ | |
children_ | |
lambdas_ | |
sizes_ |
void ML::HDBSCAN::Common::CondensedHierarchy< value_idx, value_t >::condense | ( | value_idx * | full_parents, |
value_idx * | full_children, | ||
value_t * | full_lambdas, | ||
value_idx * | full_sizes, | ||
value_idx | size = -1 |
||
) |
To maintain a high level of parallelism, the output from Condense::build_condensed_hierarchy() is sparse (the cluster nodes inside any collapsed subtrees will be 0).
This function converts the sparse form to a dense form and renumbers the cluster nodes into a topological sort order. The renumbering reverses the values in the parent array since root has the largest value in the single-linkage tree. Then, it makes the combined parent and children arrays monotonic. Finally all of the arrays of the dendrogram are sorted by parent->children->sizes (e.g. topological). The root node will always have an id of 0 and the largest cluster size.
Ths single-linkage tree dendrogram is a binary tree and parents/children can be found with simple indexing arithmetic but the condensed tree no longer has this property and so the tree now relies on either special indexing or the topological ordering for efficient traversal.
|
inline |
value_idx ML::HDBSCAN::Common::CondensedHierarchy< value_idx, value_t >::get_cluster_tree_edges | ( | ) |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |