27 #ifdef omp_get_max_threads 
   28 #if omp_get_max_threads() != 1 
   29 #error "Inconsistent placeholders for omp_get_max_threads" 
   32 #define omp_get_max_threads() 1 
   81 template <
bool has_categorical_nodes,
 
   84           typename vector_output_t    = std::nullptr_t,
 
   85           typename categorical_data_t = std::nullptr_t>
 
   88                       typename forest_t::io_type* output,
 
   89                       typename forest_t::io_type 
const* input,
 
   93                       index_type chunk_size               = hardware_constructive_interference_size,
 
   94                       index_type grove_size               = hardware_constructive_interference_size,
 
   95                       vector_output_t vector_output_p     = 
nullptr,
 
   96                       categorical_data_t categorical_data = 
nullptr,
 
   99   auto constexpr has_vector_leaves       = !std::is_same_v<vector_output_t, std::nullptr_t>;
 
  100   auto constexpr has_nonlocal_categories = !std::is_same_v<categorical_data_t, std::nullptr_t>;
 
  102   using node_t = 
typename forest_t::node_type;
 
  104   using output_t = 
typename forest_t::template raw_output_type<vector_output_t>;
 
  110   auto output_workspace = std::vector<output_t>(row_count * num_outputs * num_grove, output_t{});
 
  111   auto const task_count = num_grove * num_chunk;
 
  113 #pragma omp parallel num_threads(std::min(index_type(omp_get_max_threads()), task_count)) 
  117     for (
auto task_index = 
index_type{}; task_index < task_count; ++task_index) {
 
  118       auto const grove_index = task_index / num_chunk;
 
  119       auto const chunk_index = task_index % num_chunk;
 
  120       auto const start_row   = chunk_index * chunk_size;
 
  121       auto const end_row     = std::min(start_row + chunk_size, row_count);
 
  122       auto const start_tree  = grove_index * grove_size;
 
  123       auto const end_tree    = std::min(start_tree + grove_size, num_tree);
 
  125       for (
auto row_index = start_row; row_index < end_row; ++row_index) {
 
  126         for (
auto tree_index = start_tree; tree_index < end_tree; ++tree_index) {
 
  128             std::conditional_t<predict_leaf,
 
  130                                std::conditional_t<has_vector_leaves,
 
  132                                                   typename node_t::threshold_type>>{};
 
  134                                       has_categorical_nodes,
 
  135                                       has_nonlocal_categories,
 
  137             forest, tree_index, input + row_index * col_count, categorical_data);
 
  138           if constexpr (predict_leaf) {
 
  139             output_workspace[row_index * num_outputs * num_grove + tree_index * num_grove +
 
  140                              grove_index] = 
static_cast<typename forest_t::io_type
>(tree_output);
 
  143             if constexpr (has_vector_leaves) {
 
  144               auto output_offset = (row_index * num_outputs * num_grove +
 
  145                                     tree_index * default_num_outputs * num_grove *
 
  148               for (
auto output_index = 
index_type{}; output_index < default_num_outputs;
 
  150                 output_workspace[output_offset + output_index * num_grove] +=
 
  151                   vector_output_p[tree_output * default_num_outputs + output_index];
 
  155                 (row_index * num_outputs * num_grove +
 
  156                  (tree_index % default_num_outputs) * num_grove *
 
  159               output_workspace[output_offset] += tree_output;
 
  168     for (
auto row_index = 
index_type{}; row_index < row_count; ++row_index) {
 
  169       for (
auto output_index = 
index_type{}; output_index < num_outputs; ++output_index) {
 
  170         auto grove_offset = (row_index * num_outputs * num_grove + output_index * num_grove);
 
  172         output_workspace[grove_offset] =
 
  173           std::accumulate(std::begin(output_workspace) + grove_offset,
 
  174                           std::begin(output_workspace) + grove_offset + num_grove,
 
  177       postproc(output_workspace.data() + row_index * num_outputs * num_grove,
 
  179                output + row_index * num_outputs,
 
void infer_kernel_cpu(forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type const *input, index_type row_count, index_type col_count, index_type num_outputs, index_type chunk_size=hardware_constructive_interference_size, index_type grove_size=hardware_constructive_interference_size, vector_output_t vector_output_p=nullptr, categorical_data_t categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind)
Definition: cpu.hpp:86
 
HOST DEVICE auto evaluate_tree(forest_t const &forest, index_type tree_index, io_t const *__restrict__ row, categorical_data_t categorical_data)
Definition: evaluate_tree.hpp:173
 
infer_kind
Definition: infer_kind.hpp:19
 
uint32_t index_type
Definition: index_type.hpp:20
 
Definition: dbscan.hpp:29
 
HOST DEVICE constexpr auto ceildiv(T dividend, U divisor)
Definition: ceildiv.hpp:21
 
Definition: forest.hpp:35
 
HOST DEVICE auto num_outputs() const
Definition: forest.hpp:71
 
HOST DEVICE auto tree_count() const
Definition: forest.hpp:67
 
Definition: postprocessor.hpp:140