utils.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include "../condensed_hierarchy.cu"
9 
10 #include <common/fast_int_div.cuh>
11 
12 #include <cuml/cluster/hdbscan.hpp>
13 
14 #include <raft/core/device_mdspan.hpp>
15 #include <raft/label/classlabels.cuh>
16 #include <raft/linalg/matrix_vector_op.cuh>
17 #include <raft/linalg/norm.cuh>
18 #include <raft/sparse/convert/csr.cuh>
19 #include <raft/sparse/op/sort.cuh>
20 #include <raft/util/cudart_utils.hpp>
21 
22 #include <rmm/device_uvector.hpp>
23 #include <rmm/exec_policy.hpp>
24 
25 #include <cub/cub.cuh>
26 #include <cuda/functional>
27 #include <thrust/copy.h>
28 #include <thrust/execution_policy.h>
29 #include <thrust/for_each.h>
30 #include <thrust/functional.h>
31 #include <thrust/iterator/zip_iterator.h>
32 #include <thrust/reduce.h>
33 #include <thrust/sort.h>
34 #include <thrust/transform.h>
35 #include <thrust/transform_reduce.h>
36 #include <thrust/tuple.h>
37 
38 #include <algorithm>
39 
40 namespace ML {
41 namespace HDBSCAN {
42 namespace detail {
43 namespace Utils {
44 
58 template <typename value_idx, typename value_t, typename CUBReduceFunc>
59 void cub_segmented_reduce(const value_t* in,
60  value_t* out,
61  int n_segments,
62  const value_idx* offsets,
63  cudaStream_t stream,
64  CUBReduceFunc cub_reduce_func)
65 {
66  rmm::device_uvector<char> d_temp_storage(0, stream);
67  size_t temp_storage_bytes = 0;
68  cub_reduce_func(nullptr, temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);
69  d_temp_storage.resize(temp_storage_bytes, stream);
70 
71  cub_reduce_func(
72  d_temp_storage.data(), temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);
73 }
74 
84 template <typename value_idx, typename value_t>
86  const raft::handle_t& handle, Common::CondensedHierarchy<value_idx, value_t>& condensed_tree)
87 {
88  auto stream = handle.get_stream();
89  auto thrust_policy = handle.get_thrust_policy();
90  auto parents = condensed_tree.get_parents();
91  auto children = condensed_tree.get_children();
92  auto lambdas = condensed_tree.get_lambdas();
93  auto sizes = condensed_tree.get_sizes();
94 
95  value_idx cluster_tree_edges = thrust::transform_reduce(
96  thrust_policy,
97  sizes,
98  sizes + condensed_tree.get_n_edges(),
99  cuda::proclaim_return_type<value_idx>(
100  [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a > 1); }),
101  static_cast<value_idx>(0),
102  cuda::std::plus<value_idx>());
103 
104  // remove leaves from condensed tree
105  rmm::device_uvector<value_idx> cluster_parents(cluster_tree_edges, stream);
106  rmm::device_uvector<value_idx> cluster_children(cluster_tree_edges, stream);
107  rmm::device_uvector<value_t> cluster_lambdas(cluster_tree_edges, stream);
108  rmm::device_uvector<value_idx> cluster_sizes(cluster_tree_edges, stream);
109 
110  auto in = thrust::make_zip_iterator(thrust::make_tuple(parents, children, lambdas, sizes));
111 
112  auto out = thrust::make_zip_iterator(thrust::make_tuple(
113  cluster_parents.data(), cluster_children.data(), cluster_lambdas.data(), cluster_sizes.data()));
114 
115  thrust::copy_if(thrust_policy,
116  in,
117  in + (condensed_tree.get_n_edges()),
118  sizes,
119  out,
120  [=] __device__(value_idx a) { return a > 1; });
121 
122  auto n_leaves = condensed_tree.get_n_leaves();
123  thrust::transform(thrust_policy,
124  cluster_parents.begin(),
125  cluster_parents.end(),
126  cluster_parents.begin(),
127  [n_leaves] __device__(value_idx a) { return a - n_leaves; });
128  thrust::transform(thrust_policy,
129  cluster_children.begin(),
130  cluster_children.end(),
131  cluster_children.begin(),
132  [n_leaves] __device__(value_idx a) { return a - n_leaves; });
133 
135  condensed_tree.get_n_leaves(),
136  cluster_tree_edges,
137  condensed_tree.get_n_clusters(),
138  std::move(cluster_parents),
139  std::move(cluster_children),
140  std::move(cluster_lambdas),
141  std::move(cluster_sizes));
142 }
143 
153 template <typename value_idx, typename value_t>
154 void parent_csr(const raft::handle_t& handle,
156  value_idx* sorted_parents,
157  value_idx* indptr)
158 {
159  auto stream = handle.get_stream();
160  auto thrust_policy = handle.get_thrust_policy();
161 
162  auto children = condensed_tree.get_children();
163  auto sizes = condensed_tree.get_sizes();
164  auto n_edges = condensed_tree.get_n_edges();
165  auto n_leaves = condensed_tree.get_n_leaves();
166  auto n_clusters = condensed_tree.get_n_clusters();
167 
168  // 0-index sorted parents by subtracting n_leaves for offsets and birth/stability indexing
169  auto index_op = [n_leaves] __device__(const auto& x) { return x - n_leaves; };
171  thrust_policy, sorted_parents, sorted_parents + n_edges, sorted_parents, index_op);
172 
173  raft::sparse::convert::sorted_coo_to_csr(sorted_parents, n_edges, indptr, n_clusters + 1, stream);
174 }
175 
176 template <typename value_idx, typename value_t>
177 void normalize(value_t* data, value_idx n, size_t m, cudaStream_t stream)
178 {
179  rmm::device_uvector<value_t> sums(m, stream);
180 
181  // Compute row sums
182  raft::linalg::rowNorm<raft::linalg::NormType::L1Norm, true, value_t, size_t>(
183  sums.data(), data, (size_t)n, m, stream);
184 
185  // Divide vector by row sums (modify in place)
186  raft::linalg::matrixVectorOp<true, false>(
187  data,
188  const_cast<value_t*>(data),
189  sums.data(),
190  n,
191  (value_idx)m,
192  [] __device__(value_t mat_in, value_t vec_in) { return mat_in / vec_in; },
193  stream);
194 }
195 
206 template <typename value_idx, typename value_t>
207 void softmax(const raft::handle_t& handle, value_t* data, value_idx n, size_t m)
208 {
209  rmm::device_uvector<value_t> linf_norm(m, handle.get_stream());
210 
211  auto data_const_view =
212  raft::make_device_matrix_view<const value_t, value_idx, raft::row_major>(data, (int)m, n);
213  auto data_view =
214  raft::make_device_matrix_view<value_t, value_idx, raft::row_major>(data, (int)m, n);
215  auto linf_norm_const_view =
216  raft::make_device_vector_view<const value_t, value_idx>(linf_norm.data(), (int)m);
217  auto linf_norm_view = raft::make_device_vector_view<value_t, value_idx>(linf_norm.data(), (int)m);
218 
219  raft::linalg::norm<raft::linalg::NormType::LinfNorm, raft::Apply::ALONG_ROWS>(
220  handle, data_const_view, linf_norm_view);
221 
222  raft::linalg::matrix_vector_op<raft::Apply::ALONG_COLUMNS>(
223  handle,
224  data_const_view,
225  linf_norm_const_view,
226  data_view,
227  [] __device__(value_t mat_in, value_t vec_in) { return exp(mat_in - vec_in); });
228 }
229 
230 }; // namespace Utils
231 }; // namespace detail
232 }; // namespace HDBSCAN
233 }; // namespace ML
Definition: hdbscan.hpp:29
value_idx * get_sizes()
Definition: hdbscan.hpp:107
value_t * get_lambdas()
Definition: hdbscan.hpp:106
value_idx get_n_leaves() const
Definition: hdbscan.hpp:110
value_idx get_n_edges()
Definition: hdbscan.hpp:108
value_idx * get_children()
Definition: hdbscan.hpp:105
int get_n_clusters()
Definition: hdbscan.hpp:109
value_idx * get_parents()
Definition: hdbscan.hpp:104
Common::CondensedHierarchy< value_idx, value_t > make_cluster_tree(const raft::handle_t &handle, Common::CondensedHierarchy< value_idx, value_t > &condensed_tree)
Definition: utils.h:85
void softmax(const raft::handle_t &handle, value_t *data, value_idx n, size_t m)
Definition: utils.h:207
void normalize(value_t *data, value_idx n, size_t m, cudaStream_t stream)
Definition: utils.h:177
void cub_segmented_reduce(const value_t *in, value_t *out, int n_segments, const value_idx *offsets, cudaStream_t stream, CUBReduceFunc cub_reduce_func)
Definition: utils.h:59
void parent_csr(const raft::handle_t &handle, Common::CondensedHierarchy< value_idx, value_t > &condensed_tree, value_idx *sorted_parents, value_idx *indptr)
Definition: utils.h:154
void transform(const raft::handle_t &handle, const KMeansParams ¶ms, const float *centroids, const float *X, int n_samples, int n_features, float *X_new)
Transform X to a cluster-distance space.
Definition: dbscan.hpp:18