cuML C++ API: src/hdbscan/detail/utils.h Source File

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2021-2025, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */


 #pragma once


 #include "../condensed_hierarchy.cu"


 #include <common/fast_int_div.cuh>


 #include <cuml/cluster/hdbscan.hpp>


 #include <raft/core/device_mdspan.hpp>

 #include <raft/label/classlabels.cuh>

 #include <raft/linalg/matrix_vector_op.cuh>

 #include <raft/linalg/norm.cuh>

 #include <raft/sparse/convert/csr.cuh>

 #include <raft/sparse/op/sort.cuh>

 #include <raft/util/cudart_utils.hpp>


 #include <rmm/device_uvector.hpp>

 #include <rmm/exec_policy.hpp>


 #include <cub/cub.cuh>

 #include <cuda/functional>

 #include <thrust/copy.h>

 #include <thrust/execution_policy.h>

 #include <thrust/for_each.h>

 #include <thrust/functional.h>

 #include <thrust/iterator/zip_iterator.h>

 #include <thrust/reduce.h>

 #include <thrust/sort.h>

 #include <thrust/transform.h>

 #include <thrust/transform_reduce.h>

 #include <thrust/tuple.h>


 #include <algorithm>


 namespace ML {

 namespace HDBSCAN {

 namespace detail {

 namespace Utils {


 template <typename value_idx, typename value_t, typename CUBReduceFunc>

 void cub_segmented_reduce(const value_t* in,

                           value_t* out,

                           int n_segments,

                           const value_idx* offsets,

                           cudaStream_t stream,

                           CUBReduceFunc cub_reduce_func)

 {

   rmm::device_uvector<char> d_temp_storage(0, stream);

   size_t temp_storage_bytes = 0;

   cub_reduce_func(nullptr, temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);

   d_temp_storage.resize(temp_storage_bytes, stream);


   cub_reduce_func(

     d_temp_storage.data(), temp_storage_bytes, in, out, n_segments, offsets, offsets + 1, stream);

 }


 template <typename value_idx, typename value_t>

 Common::CondensedHierarchy<value_idx, value_t> make_cluster_tree(

   const raft::handle_t& handle, Common::CondensedHierarchy<value_idx, value_t>& condensed_tree)

 {

   auto stream        = handle.get_stream();

   auto thrust_policy = handle.get_thrust_policy();

   auto parents       = condensed_tree.get_parents();

   auto children      = condensed_tree.get_children();

   auto lambdas       = condensed_tree.get_lambdas();

   auto sizes         = condensed_tree.get_sizes();


   value_idx cluster_tree_edges = thrust::transform_reduce(

     thrust_policy,

     sizes,

     sizes + condensed_tree.get_n_edges(),

     cuda::proclaim_return_type<value_idx>(

       [=] __device__(value_idx a) -> value_idx { return static_cast<value_idx>(a > 1); }),

     static_cast<value_idx>(0),

     cuda::std::plus<value_idx>());


   // remove leaves from condensed tree

   rmm::device_uvector<value_idx> cluster_parents(cluster_tree_edges, stream);

   rmm::device_uvector<value_idx> cluster_children(cluster_tree_edges, stream);

   rmm::device_uvector<value_t> cluster_lambdas(cluster_tree_edges, stream);

   rmm::device_uvector<value_idx> cluster_sizes(cluster_tree_edges, stream);


   auto in = thrust::make_zip_iterator(thrust::make_tuple(parents, children, lambdas, sizes));


   auto out = thrust::make_zip_iterator(thrust::make_tuple(

     cluster_parents.data(), cluster_children.data(), cluster_lambdas.data(), cluster_sizes.data()));


   thrust::copy_if(thrust_policy,

                   in,

                   in + (condensed_tree.get_n_edges()),

                   sizes,

                   out,

                   [=] __device__(value_idx a) { return a > 1; });


   auto n_leaves = condensed_tree.get_n_leaves();

   thrust::transform(thrust_policy,

                     cluster_parents.begin(),

                     cluster_parents.end(),

                     cluster_parents.begin(),

                     [n_leaves] __device__(value_idx a) { return a - n_leaves; });

   thrust::transform(thrust_policy,

                     cluster_children.begin(),

                     cluster_children.end(),

                     cluster_children.begin(),

                     [n_leaves] __device__(value_idx a) { return a - n_leaves; });


   return Common::CondensedHierarchy<value_idx, value_t>(handle,

                                                         condensed_tree.get_n_leaves(),

                                                         cluster_tree_edges,

                                                         condensed_tree.get_n_clusters(),

                                                         std::move(cluster_parents),

                                                         std::move(cluster_children),

                                                         std::move(cluster_lambdas),

                                                         std::move(cluster_sizes));

 }


 template <typename value_idx, typename value_t>

 void parent_csr(const raft::handle_t& handle,

                 Common::CondensedHierarchy<value_idx, value_t>& condensed_tree,

                 value_idx* sorted_parents,

                 value_idx* indptr)

 {

   auto stream        = handle.get_stream();

   auto thrust_policy = handle.get_thrust_policy();


   auto children   = condensed_tree.get_children();

   auto sizes      = condensed_tree.get_sizes();

   auto n_edges    = condensed_tree.get_n_edges();

   auto n_leaves   = condensed_tree.get_n_leaves();

   auto n_clusters = condensed_tree.get_n_clusters();


   // 0-index sorted parents by subtracting n_leaves for offsets and birth/stability indexing

   auto index_op = [n_leaves] __device__(const auto& x) { return x - n_leaves; };

   thrust::transform(

     thrust_policy, sorted_parents, sorted_parents + n_edges, sorted_parents, index_op);


   raft::sparse::convert::sorted_coo_to_csr(sorted_parents, n_edges, indptr, n_clusters + 1, stream);

 }


 template <typename value_idx, typename value_t>

 void normalize(value_t* data, value_idx n, size_t m, cudaStream_t stream)

 {

   rmm::device_uvector<value_t> sums(m, stream);


   // Compute row sums

   raft::linalg::rowNorm<raft::linalg::NormType::L1Norm, true, value_t, size_t>(

     sums.data(), data, (size_t)n, m, stream);


   // Divide vector by row sums (modify in place)

   raft::linalg::matrixVectorOp<true, false>(

     data,

     const_cast<value_t*>(data),

     sums.data(),

     n,

     (value_idx)m,

     [] __device__(value_t mat_in, value_t vec_in) { return mat_in / vec_in; },

     stream);

 }


 template <typename value_idx, typename value_t>

 void softmax(const raft::handle_t& handle, value_t* data, value_idx n, size_t m)

 {

   rmm::device_uvector<value_t> linf_norm(m, handle.get_stream());


   auto data_const_view =

     raft::make_device_matrix_view<const value_t, value_idx, raft::row_major>(data, (int)m, n);

   auto data_view =

     raft::make_device_matrix_view<value_t, value_idx, raft::row_major>(data, (int)m, n);

   auto linf_norm_const_view =

     raft::make_device_vector_view<const value_t, value_idx>(linf_norm.data(), (int)m);

   auto linf_norm_view = raft::make_device_vector_view<value_t, value_idx>(linf_norm.data(), (int)m);


   raft::linalg::norm<raft::linalg::NormType::LinfNorm, raft::Apply::ALONG_ROWS>(

     handle, data_const_view, linf_norm_view);


   raft::linalg::matrix_vector_op<raft::Apply::ALONG_COLUMNS>(

     handle,

     data_const_view,

     linf_norm_const_view,

     data_view,

     [] __device__(value_t mat_in, value_t vec_in) { return exp(mat_in - vec_in); });

 }


 };  // namespace Utils

 };  // namespace detail

 };  // namespace HDBSCAN

 };  // namespace ML

ML::HDBSCAN::Common::CondensedHierarchy
Definition: hdbscan.hpp:29

ML::HDBSCAN::Common::CondensedHierarchy::get_sizes
value_idx * get_sizes()
Definition: hdbscan.hpp:107

ML::HDBSCAN::Common::CondensedHierarchy::get_lambdas
value_t * get_lambdas()
Definition: hdbscan.hpp:106

ML::HDBSCAN::Common::CondensedHierarchy::get_n_leaves
value_idx get_n_leaves() const
Definition: hdbscan.hpp:110

ML::HDBSCAN::Common::CondensedHierarchy::get_n_edges
value_idx get_n_edges()
Definition: hdbscan.hpp:108

ML::HDBSCAN::Common::CondensedHierarchy::get_children
value_idx * get_children()
Definition: hdbscan.hpp:105

ML::HDBSCAN::Common::CondensedHierarchy::get_n_clusters
int get_n_clusters()
Definition: hdbscan.hpp:109

ML::HDBSCAN::Common::CondensedHierarchy::get_parents
value_idx * get_parents()
Definition: hdbscan.hpp:104

fast_int_div.cuh

hdbscan.hpp

ML::HDBSCAN::detail::Utils::make_cluster_tree
Common::CondensedHierarchy< value_idx, value_t > make_cluster_tree(const raft::handle_t &handle, Common::CondensedHierarchy< value_idx, value_t > &condensed_tree)
Definition: utils.h:85

ML::HDBSCAN::detail::Utils::softmax
void softmax(const raft::handle_t &handle, value_t *data, value_idx n, size_t m)
Definition: utils.h:207

ML::HDBSCAN::detail::Utils::normalize
void normalize(value_t *data, value_idx n, size_t m, cudaStream_t stream)
Definition: utils.h:177

ML::HDBSCAN::detail::Utils::cub_segmented_reduce
void cub_segmented_reduce(const value_t *in, value_t *out, int n_segments, const value_idx *offsets, cudaStream_t stream, CUBReduceFunc cub_reduce_func)
Definition: utils.h:59

ML::HDBSCAN::detail::Utils::parent_csr
void parent_csr(const raft::handle_t &handle, Common::CondensedHierarchy< value_idx, value_t > &condensed_tree, value_idx *sorted_parents, value_idx *indptr)
Definition: utils.h:154

ML::kmeans::transform
void transform(const raft::handle_t &handle, const KMeansParams ¶ms, const float *centroids, const float *X, int n_samples, int n_features, float *X_new)
Transform X to a cluster-distance space.

ML
Definition: dbscan.hpp:18