decision_forest.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
31 
32 #include <stddef.h>
33 #include <stdint.h>
34 
35 #include <algorithm>
36 #include <cstddef>
37 #include <limits>
38 #include <optional>
39 #include <variant>
40 
41 namespace ML {
42 namespace experimental {
43 namespace fil {
44 
68 template <tree_layout layout_v,
69  typename threshold_t,
70  typename index_t,
71  typename metadata_storage_t,
72  typename offset_t>
77  auto constexpr static const layout = layout_v;
90  using io_type = typename forest_type::io_type;
94  using threshold_type = threshold_t;
103 
108  : nodes_{},
109  root_node_indexes_{},
110  node_id_mapping_{},
111  vector_output_{},
112  categorical_storage_{},
113  num_features_{},
114  num_outputs_{},
115  leaf_size_{},
116  has_categorical_nodes_{false},
117  row_postproc_{},
118  elem_postproc_{},
119  average_factor_{},
120  bias_{},
121  postproc_constant_{}
122  {
123  }
124 
162  raft_proto::buffer<index_type>&& root_node_indexes,
163  raft_proto::buffer<index_type>&& node_id_mapping,
166  bool has_categorical_nodes = false,
167  std::optional<raft_proto::buffer<io_type>>&& vector_output = std::nullopt,
168  std::optional<raft_proto::buffer<typename node_type::index_type>>&&
169  categorical_storage = std::nullopt,
170  index_type leaf_size = index_type{1},
171  row_op row_postproc = row_op::disable,
172  element_op elem_postproc = element_op::disable,
173  io_type average_factor = io_type{1},
174  io_type bias = io_type{0},
175  io_type postproc_constant = io_type{1})
176  : nodes_{nodes},
177  root_node_indexes_{root_node_indexes},
178  node_id_mapping_{node_id_mapping},
179  vector_output_{vector_output},
180  categorical_storage_{categorical_storage},
181  num_features_{num_features},
182  num_outputs_{num_outputs},
183  leaf_size_{leaf_size},
184  has_categorical_nodes_{has_categorical_nodes},
185  row_postproc_{row_postproc},
186  elem_postproc_{elem_postproc},
187  average_factor_{average_factor},
188  bias_{bias},
189  postproc_constant_{postproc_constant}
190  {
191  if (nodes.memory_type() != root_node_indexes.memory_type()) {
193  "Nodes and indexes of forest must both be stored on either host or device");
194  }
195  if (nodes.device_index() != root_node_indexes.device_index()) {
197  "Nodes and indexes of forest must both be stored on same device");
198  }
199  detail::initialize_device<forest_type>(nodes.device());
200  }
201 
203  auto num_features() const { return num_features_; }
205  auto num_trees() const { return root_node_indexes_.size(); }
207  auto has_vector_leaves() const { return vector_output_.has_value(); }
208 
211  auto num_outputs(infer_kind inference_kind = infer_kind::default_kind) const
212  {
213  auto result = num_outputs_;
214  if (inference_kind == infer_kind::per_tree) {
215  result = num_trees();
216  if (has_vector_leaves()) { result *= num_outputs_; }
217  } else if (inference_kind == infer_kind::leaf_id) {
218  result = num_trees();
219  }
220  return result;
221  }
222 
224  auto row_postprocessing() const { return row_postproc_; }
225  // Setter for row_postprocessing
226  void set_row_postprocessing(row_op val) { row_postproc_ = val; }
229  auto elem_postprocessing() const { return elem_postproc_; }
230 
232  auto memory_type() { return nodes_.memory_type(); }
234  auto device_index() { return nodes_.device_index(); }
235 
261  infer_kind predict_type = infer_kind::default_kind,
262  std::optional<index_type> specified_rows_per_block_iter = std::nullopt)
263  {
264  if (output.memory_type() != memory_type() || input.memory_type() != memory_type()) {
266  "Tried to use host I/O data with model on device or vice versa"};
267  }
268  if (output.device_index() != device_index() || input.device_index() != device_index()) {
269  throw raft_proto::wrong_device{"I/O data on different device than model"};
270  }
271  auto* vector_output_data =
272  (vector_output_.has_value() ? vector_output_->data() : static_cast<io_type*>(nullptr));
273  auto* categorical_storage_data =
274  (categorical_storage_.has_value() ? categorical_storage_->data()
275  : static_cast<categorical_storage_type*>(nullptr));
276  switch (nodes_.device().index()) {
277  case 0:
278  fil::detail::infer(obj(),
279  get_postprocessor(predict_type),
280  output.data(),
281  input.data(),
282  index_type(input.size() / num_features_),
283  num_features_,
284  num_outputs(predict_type),
285  has_categorical_nodes_,
286  vector_output_data,
287  categorical_storage_data,
288  predict_type,
289  specified_rows_per_block_iter,
290  std::get<0>(nodes_.device()),
291  stream);
292  break;
293  case 1:
294  fil::detail::infer(obj(),
295  get_postprocessor(predict_type),
296  output.data(),
297  input.data(),
298  index_type(input.size() / num_features_),
299  num_features_,
300  num_outputs(predict_type),
301  has_categorical_nodes_,
302  vector_output_data,
303  categorical_storage_data,
304  predict_type,
305  specified_rows_per_block_iter,
306  std::get<1>(nodes_.device()),
307  stream);
308  break;
309  }
310  }
311 
312  private:
316  raft_proto::buffer<index_type> root_node_indexes_;
318  raft_proto::buffer<index_type> node_id_mapping_;
320  std::optional<raft_proto::buffer<io_type>> vector_output_;
323  std::optional<raft_proto::buffer<categorical_storage_type>> categorical_storage_;
324 
325  // Metadata
326  index_type num_features_;
327  index_type num_outputs_;
328  index_type leaf_size_;
329  bool has_categorical_nodes_ = false;
330  // Postprocessing constants
331  row_op row_postproc_;
332  element_op elem_postproc_;
333  io_type average_factor_;
334  io_type bias_;
335  io_type postproc_constant_;
336 
337  auto obj() const
338  {
339  return forest_type{nodes_.data(),
340  root_node_indexes_.data(),
341  node_id_mapping_.data(),
342  static_cast<index_type>(root_node_indexes_.size()),
343  num_outputs_};
344  }
345 
346  auto get_postprocessor(infer_kind inference_kind = infer_kind::default_kind) const
347  {
348  auto result = postprocessor_type{};
349  if (inference_kind == infer_kind::default_kind) {
350  result = postprocessor_type{
351  row_postproc_, elem_postproc_, average_factor_, bias_, postproc_constant_};
352  }
353  return result;
354  }
355 
356  auto leaf_size() const { return leaf_size_; }
357 };
358 
359 namespace detail {
373 template <tree_layout layout, bool double_precision, bool large_trees>
375  layout,
380 
381 } // namespace detail
382 
385  std::variant<detail::preset_decision_forest<
386  std::variant_alternative_t<0, detail::specialization_variant>::layout,
387  std::variant_alternative_t<0, detail::specialization_variant>::is_double_precision,
388  std::variant_alternative_t<0, detail::specialization_variant>::has_large_trees>,
390  std::variant_alternative_t<1, detail::specialization_variant>::layout,
391  std::variant_alternative_t<1, detail::specialization_variant>::is_double_precision,
392  std::variant_alternative_t<1, detail::specialization_variant>::has_large_trees>,
394  std::variant_alternative_t<2, detail::specialization_variant>::layout,
395  std::variant_alternative_t<2, detail::specialization_variant>::is_double_precision,
396  std::variant_alternative_t<2, detail::specialization_variant>::has_large_trees>,
398  std::variant_alternative_t<3, detail::specialization_variant>::layout,
399  std::variant_alternative_t<3, detail::specialization_variant>::is_double_precision,
400  std::variant_alternative_t<3, detail::specialization_variant>::has_large_trees>,
402  std::variant_alternative_t<4, detail::specialization_variant>::layout,
403  std::variant_alternative_t<4, detail::specialization_variant>::is_double_precision,
404  std::variant_alternative_t<4, detail::specialization_variant>::has_large_trees>,
406  std::variant_alternative_t<5, detail::specialization_variant>::layout,
407  std::variant_alternative_t<5, detail::specialization_variant>::is_double_precision,
408  std::variant_alternative_t<5, detail::specialization_variant>::has_large_trees>,
410  std::variant_alternative_t<6, detail::specialization_variant>::layout,
411  std::variant_alternative_t<6, detail::specialization_variant>::is_double_precision,
412  std::variant_alternative_t<6, detail::specialization_variant>::has_large_trees>,
414  std::variant_alternative_t<7, detail::specialization_variant>::layout,
415  std::variant_alternative_t<7, detail::specialization_variant>::is_double_precision,
416  std::variant_alternative_t<7, detail::specialization_variant>::has_large_trees>>;
417 
436 inline auto get_forest_variant_index(bool use_double_thresholds,
437  index_type max_node_offset,
438  index_type num_features,
439  index_type num_categorical_nodes = index_type{},
440  index_type max_num_categories = index_type{},
441  index_type num_vector_leaves = index_type{},
442  tree_layout layout = preferred_tree_layout)
443 {
444  using small_index_t =
446  auto max_local_categories = index_type(sizeof(small_index_t) * 8);
447  // If the index required for pointing to categorical storage bins or vector
448  // leaf output exceeds what we can store in a uint32_t, uint64_t will be used
449  //
450  // TODO(wphicks): We are overestimating categorical storage required here
451  auto double_indexes_required =
452  (max_num_categories > max_local_categories &&
453  ((raft_proto::ceildiv(max_num_categories, max_local_categories) + 1 * num_categorical_nodes) >
455  num_vector_leaves > std::numeric_limits<small_index_t>::max();
456 
457  auto double_precision = use_double_thresholds || double_indexes_required;
458 
459  using small_metadata_t =
461  using small_offset_t =
463 
464  auto large_trees =
465  (num_features > (std::numeric_limits<small_metadata_t>::max() >> reserved_node_metadata_bits) ||
466  max_node_offset > std::numeric_limits<small_offset_t>::max());
467 
468  auto layout_value = static_cast<std::underlying_type_t<tree_layout>>(layout);
469 
470  return ((index_type{layout_value} << index_type{2}) +
471  (index_type{double_precision} << index_type{1}) + index_type{large_trees});
472 }
473 } // namespace fil
474 } // namespace experimental
475 } // namespace ML
math_t max(math_t a, math_t b)
Definition: learning_rate.h:27
void infer(forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type *input, index_type row_count, index_type col_count, index_type output_count, bool has_categorical_nodes, typename forest_t::io_type *vector_output=nullptr, typename forest_t::node_type::index_type *categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt, raft_proto::device_id< D > device=raft_proto::device_id< D >{}, raft_proto::cuda_stream stream=raft_proto::cuda_stream{})
Definition: infer.hpp:69
tree_layout
Definition: tree_layout.hpp:20
element_op
Definition: postproc_ops.hpp:29
uint32_t index_type
Definition: index_type.hpp:21
infer_kind
Definition: infer_kind.hpp:20
auto get_forest_variant_index(bool use_double_thresholds, index_type max_node_offset, index_type num_features, index_type num_categorical_nodes=index_type{}, index_type max_num_categories=index_type{}, index_type num_vector_leaves=index_type{}, tree_layout layout=preferred_tree_layout)
Definition: decision_forest.hpp:436
std::variant< detail::preset_decision_forest< std::variant_alternative_t< 0, detail::specialization_variant >::layout, std::variant_alternative_t< 0, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 0, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 1, detail::specialization_variant >::layout, std::variant_alternative_t< 1, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 1, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 2, detail::specialization_variant >::layout, std::variant_alternative_t< 2, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 2, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 3, detail::specialization_variant >::layout, std::variant_alternative_t< 3, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 3, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 4, detail::specialization_variant >::layout, std::variant_alternative_t< 4, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 4, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 5, detail::specialization_variant >::layout, std::variant_alternative_t< 5, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 5, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 6, detail::specialization_variant >::layout, std::variant_alternative_t< 6, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 6, detail::specialization_variant >::has_large_trees >, detail::preset_decision_forest< std::variant_alternative_t< 7, detail::specialization_variant >::layout, std::variant_alternative_t< 7, detail::specialization_variant >::is_double_precision, std::variant_alternative_t< 7, detail::specialization_variant >::has_large_trees > > decision_forest_variant
Definition: decision_forest.hpp:416
row_op
Definition: postproc_ops.hpp:22
Definition: dbscan.hpp:30
HOST DEVICE constexpr auto ceildiv(T dividend, U divisor)
Definition: ceildiv.hpp:21
int cuda_stream
Definition: cuda_stream.hpp:25
Definition: decision_forest.hpp:73
typename forest_type::node_type node_type
Definition: decision_forest.hpp:86
auto row_postprocessing() const
Definition: decision_forest.hpp:224
auto device_index()
Definition: decision_forest.hpp:234
constexpr static auto const layout
Definition: decision_forest.hpp:77
auto has_vector_leaves() const
Definition: decision_forest.hpp:207
auto num_outputs(infer_kind inference_kind=infer_kind::default_kind) const
Definition: decision_forest.hpp:211
auto elem_postprocessing() const
Definition: decision_forest.hpp:229
postprocessor< io_type > postprocessor_type
Definition: decision_forest.hpp:98
forest< layout, threshold_t, index_t, metadata_storage_t, offset_t > forest_type
Definition: decision_forest.hpp:82
void predict(raft_proto::buffer< typename forest_type::io_type > &output, raft_proto::buffer< typename forest_type::io_type > const &input, raft_proto::cuda_stream stream=raft_proto::cuda_stream{}, infer_kind predict_type=infer_kind::default_kind, std::optional< index_type > specified_rows_per_block_iter=std::nullopt)
Definition: decision_forest.hpp:258
auto num_features() const
Definition: decision_forest.hpp:203
decision_forest(raft_proto::buffer< node_type > &&nodes, raft_proto::buffer< index_type > &&root_node_indexes, raft_proto::buffer< index_type > &&node_id_mapping, index_type num_features, index_type num_outputs=index_type{2}, bool has_categorical_nodes=false, std::optional< raft_proto::buffer< io_type >> &&vector_output=std::nullopt, std::optional< raft_proto::buffer< typename node_type::index_type >> &&categorical_storage=std::nullopt, index_type leaf_size=index_type{1}, row_op row_postproc=row_op::disable, element_op elem_postproc=element_op::disable, io_type average_factor=io_type{1}, io_type bias=io_type{0}, io_type postproc_constant=io_type{1})
Definition: decision_forest.hpp:161
typename forest_type::io_type io_type
Definition: decision_forest.hpp:90
void set_row_postprocessing(row_op val)
Definition: decision_forest.hpp:226
decision_forest()
Definition: decision_forest.hpp:107
auto num_trees() const
Definition: decision_forest.hpp:205
threshold_t threshold_type
Definition: decision_forest.hpp:94
typename node_type::index_type categorical_storage_type
Definition: decision_forest.hpp:102
auto memory_type()
Definition: decision_forest.hpp:232
std::conditional_t< large_trees, std::uint32_t, std::uint16_t > metadata_type
Definition: specialization_types.hpp:54
std::conditional_t< double_precision, std::uint64_t, std::uint32_t > index_type
Definition: specialization_types.hpp:52
std::conditional_t< double_precision, double, float > threshold_type
Definition: specialization_types.hpp:48
std::conditional_t< large_trees, std::uint32_t, std::uint16_t > offset_type
Definition: specialization_types.hpp:56
Definition: forest.hpp:36
threshold_t io_type
Definition: forest.hpp:38
node< layout_v, threshold_t, index_t, metadata_storage_t, offset_t > node_type
Definition: forest.hpp:37
Definition: postprocessor.hpp:141
auto size() const noexcept
Definition: buffer.hpp:293
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:294
auto memory_type() const noexcept
Definition: buffer.hpp:295
auto device_index() const noexcept
Definition: buffer.hpp:308
auto device() const noexcept
Definition: buffer.hpp:306
Definition: exceptions.hpp:49
Definition: exceptions.hpp:38
Definition: exceptions.hpp:58