17 #include <raft/core/device_coo_matrix.hpp>
18 #include <raft/core/device_mdspan.hpp>
19 #include <raft/core/handle.hpp>
20 #include <raft/core/kvp.hpp>
21 #include <raft/core/resource/thrust_policy.hpp>
22 #include <raft/sparse/coo.hpp>
23 #include <raft/util/cudart_utils.hpp>
25 #include <rmm/device_uvector.hpp>
27 #include <thrust/device_ptr.h>
28 #include <thrust/extrema.h>
29 #include <thrust/gather.h>
30 #include <thrust/scatter.h>
31 #include <thrust/transform.h>
33 #include <cuvs/cluster/agglomerative.hpp>
54 template <
typename value_
idx =
int64_t,
typename value_t =
float>
64 auto stream = handle.get_stream();
65 size_t n_edges = m - 1;
66 cuvs::cluster::agglomerative::helpers::linkage_graph_params::mutual_reachability_params
70 if (
static_cast<size_t>(
params.min_samples + 1) > m) {
72 "min_samples (%d) must be less than the number of samples in X (%zu), setting min_samples to "
77 linkage_params.min_samples = m;
79 linkage_params.min_samples =
params.min_samples + 1;
81 linkage_params.alpha =
params.alpha;
83 linkage_params.all_neighbors_params.overlap_factor =
params.build_params.overlap_factor;
84 linkage_params.all_neighbors_params.n_clusters =
params.build_params.n_clusters;
87 auto brute_force_params = cuvs::neighbors::graph_build_params::brute_force_params{};
89 linkage_params.all_neighbors_params.graph_build_params = brute_force_params;
91 auto nn_descent_params = cuvs::neighbors::graph_build_params::nn_descent_params{};
92 nn_descent_params.max_iterations =
params.build_params.nn_descent_params.max_iterations;
93 nn_descent_params.graph_degree =
params.build_params.nn_descent_params.graph_degree;
94 nn_descent_params.intermediate_graph_degree =
95 params.build_params.nn_descent_params.intermediate_graph_degree;
96 nn_descent_params.termination_threshold =
97 params.build_params.nn_descent_params.termination_threshold;
98 if (
static_cast<int>(nn_descent_params.graph_degree) < linkage_params.min_samples) {
101 "NN Descent graph_degree (%d) must be larger than or equal to min_samples + 1 (%zu), "
102 "setting graph_degree to min_samples + 1 and scaling intermediate_graph_degree accordingly "
104 static_cast<int>(nn_descent_params.graph_degree),
105 static_cast<int>(linkage_params.min_samples));
106 nn_descent_params.graph_degree = linkage_params.min_samples;
107 nn_descent_params.intermediate_graph_degree = nn_descent_params.graph_degree * 2;
110 linkage_params.all_neighbors_params.graph_build_params = nn_descent_params;
112 RAFT_FAIL(
"Unsupported build algo. build algo must be either brute force or nn descent");
115 cudaPointerAttributes attr;
116 RAFT_CUDA_TRY(cudaPointerGetAttributes(&attr, X));
117 bool data_on_device = attr.type == cudaMemoryTypeDevice;
119 if (data_on_device) {
122 raft::make_device_matrix_view<const value_t, value_idx>(X, m, n),
125 raft::make_device_coo_matrix_view<value_t, value_idx, value_idx, size_t>(
127 raft::make_device_coordinate_structure_view(
129 raft::make_device_matrix_view<value_idx, value_idx>(out.
get_children(), n_edges, 2),
130 raft::make_device_vector_view<value_t, value_idx>(out.
get_deltas(), n_edges),
131 raft::make_device_vector_view<value_idx, value_idx>(out.
get_sizes(), n_edges),
132 std::make_optional<raft::device_vector_view<value_t, value_idx>>(
133 raft::make_device_vector_view<value_t, value_idx>(core_dists, m)));
137 raft::make_host_matrix_view<const value_t, value_idx>(X, m, n),
140 raft::make_device_coo_matrix_view<value_t, value_idx, value_idx, size_t>(
142 raft::make_device_coordinate_structure_view(
144 raft::make_device_matrix_view<value_idx, value_idx>(out.
get_children(), n_edges, 2),
145 raft::make_device_vector_view<value_t, value_idx>(out.
get_deltas(), n_edges),
146 raft::make_device_vector_view<value_idx, value_idx>(out.
get_sizes(), n_edges),
147 std::make_optional<raft::device_vector_view<value_t, value_idx>>(
148 raft::make_device_vector_view<value_t, value_idx>(core_dists, m)));
152 template <
typename value_
idx =
int64_t,
typename value_t =
float>
163 auto stream = handle.get_stream();
164 auto exec_policy = handle.get_thrust_policy();
166 int min_cluster_size =
params.min_cluster_size;
168 RAFT_EXPECTS(
params.min_samples <= m,
"min_samples must be at most the number of samples in X");
187 rmm::device_uvector<value_t> tree_stabilities(out.
get_condensed_tree().get_n_clusters(),
188 handle.get_stream());
191 handle.get_stream());
192 value_idx n_selected_clusters =
193 detail::Extract::extract_clusters(handle,
197 tree_stabilities.data(),
200 params.cluster_selection_method,
202 params.allow_single_cluster,
203 static_cast<value_idx
>(
params.max_cluster_size),
204 params.cluster_selection_epsilon);
208 auto lambdas_ptr = thrust::device_pointer_cast(out.
get_condensed_tree().get_lambdas());
209 value_t max_lambda = *(thrust::max_element(
212 detail::Stability::get_stability_scores(handle,
214 tree_stabilities.data(),
230 [label_map = label_map.data()] __device__(value_idx label) {
231 if (label != -1)
return label_map[label];
232 return static_cast<value_idx
>(-1);
Definition: hdbscan.hpp:195
Definition: hdbscan.hpp:299
CondensedHierarchy< value_idx, value_t > & get_condensed_tree()
Definition: hdbscan.hpp:341
value_t * get_stabilities()
Definition: hdbscan.hpp:324
rmm::device_uvector< value_idx > & _get_inverse_label_map()
Definition: hdbscan.hpp:327
void set_n_clusters(int n_clusters_)
Definition: hdbscan.hpp:334
value_t * get_probabilities()
Definition: hdbscan.hpp:323
Definition: hdbscan.hpp:209
value_idx * get_sizes()
Definition: hdbscan.hpp:250
value_idx * get_mst_src()
Definition: hdbscan.hpp:252
value_t * get_mst_weights()
Definition: hdbscan.hpp:254
value_idx * get_mst_dst()
Definition: hdbscan.hpp:253
value_idx * get_labels()
Definition: hdbscan.hpp:248
value_t * get_deltas()
Definition: hdbscan.hpp:251
value_idx * get_children()
Definition: hdbscan.hpp:249
Definition: params.hpp:23
@ NN_DESCENT
Definition: hdbscan.hpp:127
@ BRUTE_FORCE_KNN
Definition: hdbscan.hpp:127
void _fit_hdbscan(const raft::handle_t &handle, const value_t *X, size_t m, size_t n, ML::distance::DistanceType metric, Common::HDBSCANParams ¶ms, value_idx *labels, value_t *core_dists, Common::hdbscan_output< value_idx, value_t > &out)
Definition: runner.h:153
void build_linkage(const raft::handle_t &handle, const value_t *X, size_t m, size_t n, ML::distance::DistanceType metric, Common::HDBSCANParams ¶ms, value_t *core_dists, Common::robust_single_linkage_output< value_idx, value_t > &out)
Definition: runner.h:55
DistanceType
Definition: distance_type.hpp:10
void transform(const raft::handle_t &handle, const KMeansParams ¶ms, const float *centroids, const float *X, int n_samples, int n_features, float *X_new)
Transform X to a cluster-distance space.
Definition: dbscan.hpp:18
void build_condensed_hierarchy(const raft::handle_t &handle, const int64_t *children, const float *delta, const int64_t *sizes, int min_cluster_size, int n_leaves, HDBSCAN::Common::CondensedHierarchy< int64_t, float > &condensed_tree)