utils.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include "../condensed_hierarchy.cu"
9 
10 #include <common/fast_int_div.cuh>
11 
12 #include <cuml/cluster/hdbscan.hpp>
13 
14 #include <raft/core/device_mdspan.hpp>
15 #include <raft/label/classlabels.cuh>
16 #include <raft/linalg/matrix_vector_op.cuh>
17 #include <raft/linalg/norm.cuh>
18 #include <raft/sparse/convert/csr.cuh>
19 #include <raft/sparse/op/sort.cuh>
20 #include <raft/util/cudart_utils.hpp>
21 
22 #include <rmm/device_uvector.hpp>
23 #include <rmm/exec_policy.hpp>
24 
25 #include <cub/cub.cuh>
26 #include <cuda/functional>
27 #include <cuda/std/tuple>
28 #include <thrust/copy.h>
29 #include <thrust/execution_policy.h>
30 #include <thrust/for_each.h>
31 #include <thrust/iterator/zip_iterator.h>
32 #include <thrust/reduce.h>
33 #include <thrust/sort.h>
34 #include <thrust/transform.h>
35 #include <thrust/transform_reduce.h>
36 
37 #include <algorithm>
38 
39 namespace ML {
40 namespace HDBSCAN {
41 namespace detail {
42 namespace Utils {
43 
57 template <typename value_idx, typename value_t, typename CUBReduceFunc>
58 void cub_segmented_reduce(const value_t* in,
59  value_t* out,
60  int n_segments,
61  const value_idx* offsets,
62  cudaStream_t stream,
63  CUBReduceFunc cub_reduce_func)
64 {
65  rmm::device_uvector<char> d_temp_storage(0, stream);
66  size_t temp_storage_bytes = 0;
67  cub_reduce_func(nullptr, temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);
68  d_temp_storage.resize(temp_storage_bytes, stream);
69 
70  cub_reduce_func(
71  d_temp_storage.data(), temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);
72 }
73 
83 template <typename value_idx, typename value_t>
85  const raft::handle_t& handle, Common::CondensedHierarchy<value_idx, value_t>& condensed_tree)
86 {
87  auto stream = handle.get_stream();
88  auto thrust_policy = handle.get_thrust_policy();
89  auto parents = condensed_tree.get_parents();
90  auto children = condensed_tree.get_children();
91  auto lambdas = condensed_tree.get_lambdas();
92  auto sizes = condensed_tree.get_sizes();
93 
94  value_idx cluster_tree_edges = thrust::transform_reduce(
95  thrust_policy,
96  sizes,
97  sizes + condensed_tree.get_n_edges(),
98  cuda::proclaim_return_type<value_idx>(
99  [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a > 1); }),
100  static_cast<value_idx>(0),
101  cuda::std::plus<value_idx>());
102 
103  // remove leaves from condensed tree
104  rmm::device_uvector<value_idx> cluster_parents(cluster_tree_edges, stream);
105  rmm::device_uvector<value_idx> cluster_children(cluster_tree_edges, stream);
106  rmm::device_uvector<value_t> cluster_lambdas(cluster_tree_edges, stream);
107  rmm::device_uvector<value_idx> cluster_sizes(cluster_tree_edges, stream);
108 
109  auto in = thrust::make_zip_iterator(cuda::std::make_tuple(parents, children, lambdas, sizes));
110 
111  auto out = thrust::make_zip_iterator(cuda::std::make_tuple(
112  cluster_parents.data(), cluster_children.data(), cluster_lambdas.data(), cluster_sizes.data()));
113 
114  thrust::copy_if(thrust_policy,
115  in,
116  in + (condensed_tree.get_n_edges()),
117  sizes,
118  out,
119  [=] __device__(value_idx a) { return a > 1; });
120 
121  auto n_leaves = condensed_tree.get_n_leaves();
122  thrust::transform(thrust_policy,
123  cluster_parents.begin(),
124  cluster_parents.end(),
125  cluster_parents.begin(),
126  [n_leaves] __device__(value_idx a) { return a - n_leaves; });
127  thrust::transform(thrust_policy,
128  cluster_children.begin(),
129  cluster_children.end(),
130  cluster_children.begin(),
131  [n_leaves] __device__(value_idx a) { return a - n_leaves; });
132 
134  condensed_tree.get_n_leaves(),
135  cluster_tree_edges,
136  condensed_tree.get_n_clusters(),
137  std::move(cluster_parents),
138  std::move(cluster_children),
139  std::move(cluster_lambdas),
140  std::move(cluster_sizes));
141 }
142 
152 template <typename value_idx, typename value_t>
153 void parent_csr(const raft::handle_t& handle,
155  value_idx* sorted_parents,
156  value_idx* indptr)
157 {
158  auto stream = handle.get_stream();
159  auto thrust_policy = handle.get_thrust_policy();
160 
161  auto children = condensed_tree.get_children();
162  auto sizes = condensed_tree.get_sizes();
163  auto n_edges = condensed_tree.get_n_edges();
164  auto n_leaves = condensed_tree.get_n_leaves();
165  auto n_clusters = condensed_tree.get_n_clusters();
166 
167  // 0-index sorted parents by subtracting n_leaves for offsets and birth/stability indexing
168  auto index_op = [n_leaves] __device__(const auto& x) { return x - n_leaves; };
170  thrust_policy, sorted_parents, sorted_parents + n_edges, sorted_parents, index_op);
171 
172  raft::sparse::convert::sorted_coo_to_csr(sorted_parents, n_edges, indptr, n_clusters + 1, stream);
173 }
174 
175 template <typename value_idx, typename value_t>
176 void normalize(value_t* data, value_idx n, size_t m, cudaStream_t stream)
177 {
178  rmm::device_uvector<value_t> sums(m, stream);
179 
180  // Compute row sums
181  raft::linalg::rowNorm<raft::linalg::NormType::L1Norm, true, value_t, size_t>(
182  sums.data(), data, (size_t)n, m, stream);
183 
184  // Divide vector by row sums (modify in place)
185  raft::linalg::matrixVectorOp<true, false>(
186  data,
187  const_cast<value_t*>(data),
188  sums.data(),
189  n,
190  (value_idx)m,
191  [] __device__(value_t mat_in, value_t vec_in) { return mat_in / vec_in; },
192  stream);
193 }
194 
205 template <typename value_idx, typename value_t>
206 void softmax(const raft::handle_t& handle, value_t* data, value_idx n, size_t m)
207 {
208  rmm::device_uvector<value_t> linf_norm(m, handle.get_stream());
209 
210  auto data_const_view =
211  raft::make_device_matrix_view<const value_t, value_idx, raft::row_major>(data, (int)m, n);
212  auto data_view =
213  raft::make_device_matrix_view<value_t, value_idx, raft::row_major>(data, (int)m, n);
214  auto linf_norm_const_view =
215  raft::make_device_vector_view<const value_t, value_idx>(linf_norm.data(), (int)m);
216  auto linf_norm_view = raft::make_device_vector_view<value_t, value_idx>(linf_norm.data(), (int)m);
217 
218  raft::linalg::norm<raft::linalg::NormType::LinfNorm, raft::Apply::ALONG_ROWS>(
219  handle, data_const_view, linf_norm_view);
220 
221  raft::linalg::matrix_vector_op<raft::Apply::ALONG_COLUMNS>(
222  handle,
223  data_const_view,
224  linf_norm_const_view,
225  data_view,
226  [] __device__(value_t mat_in, value_t vec_in) { return exp(mat_in - vec_in); });
227 }
228 
229 }; // namespace Utils
230 }; // namespace detail
231 }; // namespace HDBSCAN
232 }; // namespace ML
Definition: hdbscan.hpp:29
value_idx * get_sizes()
Definition: hdbscan.hpp:107
value_t * get_lambdas()
Definition: hdbscan.hpp:106
value_idx get_n_leaves() const
Definition: hdbscan.hpp:110
value_idx get_n_edges()
Definition: hdbscan.hpp:108
value_idx * get_children()
Definition: hdbscan.hpp:105
int get_n_clusters()
Definition: hdbscan.hpp:109
value_idx * get_parents()
Definition: hdbscan.hpp:104
Common::CondensedHierarchy< value_idx, value_t > make_cluster_tree(const raft::handle_t &handle, Common::CondensedHierarchy< value_idx, value_t > &condensed_tree)
Definition: utils.h:84
void softmax(const raft::handle_t &handle, value_t *data, value_idx n, size_t m)
Definition: utils.h:206
void normalize(value_t *data, value_idx n, size_t m, cudaStream_t stream)
Definition: utils.h:176
void cub_segmented_reduce(const value_t *in, value_t *out, int n_segments, const value_idx *offsets, cudaStream_t stream, CUBReduceFunc cub_reduce_func)
Definition: utils.h:58
void parent_csr(const raft::handle_t &handle, Common::CondensedHierarchy< value_idx, value_t > &condensed_tree, value_idx *sorted_parents, value_idx *indptr)
Definition: utils.h:153
void transform(const raft::handle_t &handle, const KMeansParams ¶ms, const float *centroids, const float *X, int n_samples, int n_features, float *X_new)
Transform X to a cluster-distance space.
Definition: dbscan.hpp:18