cuML C++ API: include/cuml/fil/forest_model.hpp Source File

 /*

  * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.

  * SPDX-License-Identifier: Apache-2.0

  */

 #pragma once

 #include <cuml/fil/decision_forest.hpp>

 #include <cuml/fil/detail/index_type.hpp>

 #include <cuml/fil/detail/raft_proto/buffer.hpp>

 #include <cuml/fil/detail/raft_proto/cuda_check.hpp>

 #include <cuml/fil/detail/raft_proto/gpu_support.hpp>

 #include <cuml/fil/detail/raft_proto/handle.hpp>

 #include <cuml/fil/infer_kind.hpp>


 #include <cuda_runtime.h>


 #include <cstddef>

 #include <type_traits>

 #include <variant>


 namespace ML {

 namespace fil {


 struct forest_model {

   forest_model(decision_forest_variant&& forest = decision_forest_variant{})

     : decision_forest_{forest}

   {

   }


   auto num_features()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.num_features(); },

                       decision_forest_);

   }


   auto num_outputs()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.num_outputs(); },

                       decision_forest_);

   }


   auto num_trees()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.num_trees(); },

                       decision_forest_);

   }


   auto has_vector_leaves()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.has_vector_leaves(); },

                       decision_forest_);

   }


   auto row_postprocessing()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.row_postprocessing(); },

                       decision_forest_);

   }


   void set_row_postprocessing(row_op val)

   {

     return std::visit(

       [&val](auto&& concrete_forest) { concrete_forest.set_row_postprocessing(val); },

       decision_forest_);

   }


   auto elem_postprocessing()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.elem_postprocessing(); },

                       decision_forest_);

   }


   auto memory_type()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.memory_type(); },

                       decision_forest_);

   }


   auto device_index()

   {

     return std::visit([](auto&& concrete_forest) { return concrete_forest.device_index(); },

                       decision_forest_);

   }


   auto is_double_precision()

   {

     return std::visit(

       [](auto&& concrete_forest) {

         return std::is_same_v<typename std::remove_reference_t<decltype(concrete_forest)>::io_type,

                               double>;

       },

       decision_forest_);

   }


   template <typename io_t>

   void predict(raft_proto::buffer<io_t>& output,

                raft_proto::buffer<io_t> const& input,

                raft_proto::cuda_stream stream                 = raft_proto::cuda_stream{},

                infer_kind predict_type                        = infer_kind::default_kind,

                std::optional<index_type> specified_chunk_size = std::nullopt)

   {

     std::visit(

       [this, predict_type, &output, &input, &stream, &specified_chunk_size](

         auto&& concrete_forest) {

         if constexpr (std::is_same_v<

                         typename std::remove_reference_t<decltype(concrete_forest)>::io_type,

                         io_t>) {

           concrete_forest.predict(output, input, stream, predict_type, specified_chunk_size);

         } else {

           throw type_error("Input type does not match model_type");

         }

       },

       decision_forest_);

   }


   template <typename io_t>

   void predict(raft_proto::handle_t const& handle,

                raft_proto::buffer<io_t>& output,

                raft_proto::buffer<io_t> const& input,

                infer_kind predict_type                        = infer_kind::default_kind,

                std::optional<index_type> specified_chunk_size = std::nullopt)

   {

     std::visit(

       [this, predict_type, &handle, &output, &input, &specified_chunk_size](

         auto&& concrete_forest) {

         using model_io_t = typename std::remove_reference_t<decltype(concrete_forest)>::io_type;

         if constexpr (std::is_same_v<model_io_t, io_t>) {

           if (output.memory_type() == memory_type() && input.memory_type() == memory_type()) {

             concrete_forest.predict(

               output, input, handle.get_next_usable_stream(), predict_type, specified_chunk_size);

           } else {

             auto constexpr static const MIN_CHUNKS_PER_PARTITION = std::size_t{64};

             auto constexpr static const MAX_CHUNK_SIZE           = std::size_t{64};


             auto row_count = input.size() / num_features();

             auto partition_size =

               std::max(raft_proto::ceildiv(row_count, handle.get_usable_stream_count()),

                        specified_chunk_size.value_or(MAX_CHUNK_SIZE) * MIN_CHUNKS_PER_PARTITION);

             auto partition_count = raft_proto::ceildiv(row_count, partition_size);

             for (auto i = std::size_t{}; i < partition_count; ++i) {

               auto stream = handle.get_next_usable_stream();

               auto rows_in_this_partition =

                 std::min(partition_size, row_count - i * partition_size);

               auto partition_in = raft_proto::buffer<io_t>{};

               if (input.memory_type() != memory_type()) {

                 partition_in =

                   raft_proto::buffer<io_t>{rows_in_this_partition * num_features(), memory_type()};

                 raft_proto::copy<raft_proto::DEBUG_ENABLED>(partition_in,

                                                             input,

                                                             0,

                                                             i * partition_size * num_features(),

                                                             partition_in.size(),

                                                             stream);

               } else {

                 partition_in =

                   raft_proto::buffer<io_t>{input.data() + i * partition_size * num_features(),

                                            rows_in_this_partition * num_features(),

                                            memory_type()};

               }

               auto partition_out = raft_proto::buffer<io_t>{};

               if (output.memory_type() != memory_type()) {

                 partition_out =

                   raft_proto::buffer<io_t>{rows_in_this_partition * num_outputs(), memory_type()};

               } else {

                 partition_out =

                   raft_proto::buffer<io_t>{output.data() + i * partition_size * num_outputs(),

                                            rows_in_this_partition * num_outputs(),

                                            memory_type()};

               }

               concrete_forest.predict(

                 partition_out, partition_in, stream, predict_type, specified_chunk_size);

               if (output.memory_type() != memory_type()) {

                 raft_proto::copy<raft_proto::DEBUG_ENABLED>(output,

                                                             partition_out,

                                                             i * partition_size * num_outputs(),

                                                             0,

                                                             partition_out.size(),

                                                             stream);

               }

             }

           }

         } else {

           throw type_error("Input type does not match model_type");

         }

       },

       decision_forest_);

   }


   template <typename io_t>

   void predict(raft_proto::handle_t const& handle,

                io_t* output,

                io_t* input,

                std::size_t num_rows,

                raft_proto::device_type out_mem_type,

                raft_proto::device_type in_mem_type,

                infer_kind predict_type                        = infer_kind::default_kind,

                std::optional<index_type> specified_chunk_size = std::nullopt)

   {

     int current_device_id;

     raft_proto::cuda_check(cudaGetDevice(¤t_device_id));

     auto out_buffer =

       raft_proto::buffer{output, num_rows * num_outputs(), out_mem_type, current_device_id};

     auto in_buffer =

       raft_proto::buffer{input, num_rows * num_features(), in_mem_type, current_device_id};

     predict(handle, out_buffer, in_buffer, predict_type, specified_chunk_size);

   }


  private:

   decision_forest_variant decision_forest_;

 };


 }  // namespace fil

 }  // namespace ML

buffer.hpp

cuda_check.hpp

decision_forest.hpp

gpu_support.hpp

handle.hpp

index_type.hpp

infer_kind.hpp

ML::Solver::max
math_t max(math_t a, math_t b)
Definition: learning_rate.h:16

ML::fil::infer_kind
infer_kind
Definition: infer_kind.hpp:8

ML::fil::infer_kind::default_kind
@ default_kind

ML::fil::row_op
row_op
Definition: postproc_ops.hpp:10

ML::fil::decision_forest_variant
std::variant< detail::preset_decision_forest< std::variant_alternative_t< 0, detail::specialization_variant >::layout, std::variant_alternative_t< 0, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 0, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 1, detail::specialization_variant >::layout, std::variant_alternative_t< 1, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 1, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 2, detail::specialization_variant >::layout, std::variant_alternative_t< 2, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 2, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 3, detail::specialization_variant >::layout, std::variant_alternative_t< 3, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 3, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 4, detail::specialization_variant >::layout, std::variant_alternative_t< 4, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 4, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 5, detail::specialization_variant >::layout, std::variant_alternative_t< 5, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 5, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 6, detail::specialization_variant >::layout, std::variant_alternative_t< 6, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 6, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 7, detail::specialization_variant >::layout, std::variant_alternative_t< 7, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 7, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 8, detail::specialization_variant >::layout, std::variant_alternative_t< 8, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 8, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 9, detail::specialization_variant >::layout, std::variant_alternative_t< 9, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 9, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 10, detail::specialization_variant >::layout, std::variant_alternative_t< 10, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 10, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 11, detail::specialization_variant >::layout, std::variant_alternative_t< 11, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 11, detail::specialization_variant >::has_large_trees > > decision_forest_variant
Definition: decision_forest.hpp:425

ML
Definition: dbscan.hpp:18

raft_proto::ceildiv
HOST DEVICE constexpr auto ceildiv(T dividend, U divisor)
Definition: ceildiv.hpp:10

raft_proto::cuda_stream
int cuda_stream
Definition: cuda_stream.hpp:14

raft_proto::cuda_check
void cuda_check(error_t const &err) noexcept(!GPU_ENABLED)
Definition: cuda_check.hpp:15

raft_proto::device_type
device_type
Definition: device_type.hpp:7

ML::fil::forest_model
Definition: forest_model.hpp:29

ML::fil::forest_model::row_postprocessing
auto row_postprocessing()
Definition: forest_model.hpp:65

ML::fil::forest_model::num_features
auto num_features()
Definition: forest_model.hpp:37

ML::fil::forest_model::predict
void predict(raft_proto::handle_t const &handle, io_t *output, io_t *input, std::size_t num_rows, raft_proto::device_type out_mem_type, raft_proto::device_type in_mem_type, infer_kind predict_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt)
Definition: forest_model.hpp:283

ML::fil::forest_model::predict
void predict(raft_proto::buffer< io_t > &output, raft_proto::buffer< io_t > const &input, raft_proto::cuda_stream stream=raft_proto::cuda_stream{}, infer_kind predict_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt)
Definition: forest_model.hpp:136

ML::fil::forest_model::num_trees
auto num_trees()
Definition: forest_model.hpp:51

ML::fil::forest_model::num_outputs
auto num_outputs()
Definition: forest_model.hpp:44

ML::fil::forest_model::forest_model
forest_model(decision_forest_variant &&forest=decision_forest_variant{})
Definition: forest_model.hpp:31

ML::fil::forest_model::elem_postprocessing
auto elem_postprocessing()
Definition: forest_model.hpp:81

ML::fil::forest_model::predict
void predict(raft_proto::handle_t const &handle, raft_proto::buffer< io_t > &output, raft_proto::buffer< io_t > const &input, infer_kind predict_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt)
Definition: forest_model.hpp:184

ML::fil::forest_model::memory_type
auto memory_type()
Definition: forest_model.hpp:88

ML::fil::forest_model::has_vector_leaves
auto has_vector_leaves()
Definition: forest_model.hpp:58

ML::fil::forest_model::set_row_postprocessing
void set_row_postprocessing(row_op val)
Definition: forest_model.hpp:72

ML::fil::forest_model::device_index
auto device_index()
Definition: forest_model.hpp:95

ML::fil::forest_model::is_double_precision
auto is_double_precision()
Definition: forest_model.hpp:102

ML::fil::forest
Definition: forest.hpp:24

ML::fil::type_error
Definition: exceptions.hpp:40

raft_proto::buffer
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:30

raft_proto::buffer::size
auto size() const noexcept
Definition: buffer.hpp:282

raft_proto::buffer::data
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:283

raft_proto::buffer::memory_type
auto memory_type() const noexcept
Definition: buffer.hpp:284

raft_proto::handle_t
Definition: handle.hpp:36

raft_proto::handle_t::get_usable_stream_count
auto get_usable_stream_count() const
Definition: handle.hpp:39

raft_proto::handle_t::get_next_usable_stream
auto get_next_usable_stream() const
Definition: handle.hpp:37