Namespaces | |
device_initialization | |
inference | |
Classes | |
struct | bitset |
struct | model_builder_error |
struct | decision_forest_builder |
struct | specialization_types |
struct | traversal_container |
struct | postproc_params_t |
Typedefs | |
template<tree_layout layout, bool double_precision, bool large_trees> | |
using | preset_decision_forest = decision_forest< layout, typename specialization_types< layout, double_precision, large_trees >::threshold_type, typename specialization_types< layout, double_precision, large_trees >::index_type, typename specialization_types< layout, double_precision, large_trees >::metadata_type, typename specialization_types< layout, double_precision, large_trees >::offset_type > |
using | specialization_variant = std::variant< specialization_types< tree_layout::depth_first, false, false >, specialization_types< tree_layout::depth_first, false, true >, specialization_types< tree_layout::depth_first, true, false >, specialization_types< tree_layout::depth_first, true, true >, specialization_types< tree_layout::breadth_first, false, false >, specialization_types< tree_layout::breadth_first, false, true >, specialization_types< tree_layout::breadth_first, true, false >, specialization_types< tree_layout::breadth_first, true, true > > |
Functions | |
template<typename forest_t , raft_proto::device_type D> | |
void | initialize_device (raft_proto::device_id< D > device) |
template<typename forest_t > | |
void | initialize_device (raft_proto::device_id_variant device) |
template<bool has_vector_leaves, bool has_categorical_nodes, typename node_t , typename io_t , typename node_id_mapping_t = std::nullptr_t> | |
HOST DEVICE auto | evaluate_tree_impl (node_t const *__restrict__ node, io_t const *__restrict__ row, node_t const *__restrict__ first_root_node=nullptr, node_id_mapping_t node_id_mapping=nullptr) |
template<bool has_vector_leaves, typename node_t , typename io_t , typename categorical_storage_t , typename node_id_mapping_t = std::nullptr_t> | |
HOST DEVICE auto | evaluate_tree_impl (node_t const *__restrict__ node, io_t const *__restrict__ row, categorical_storage_t const *__restrict__ categorical_storage, node_t const *__restrict__ first_root_node=nullptr, node_id_mapping_t node_id_mapping=nullptr) |
template<bool has_vector_leaves, bool has_categorical_nodes, bool has_nonlocal_categories, bool predict_leaf, typename forest_t , typename io_t , typename categorical_data_t > | |
HOST DEVICE auto | evaluate_tree (forest_t const &forest, index_type tree_index, io_t const *__restrict__ row, categorical_data_t categorical_data) |
auto | get_max_shared_mem_per_block (raft_proto::device_id< raft_proto::device_type::gpu > device_id) |
auto | get_sm_count (raft_proto::device_id< raft_proto::device_type::gpu > device_id) |
auto | get_max_threads_per_sm (raft_proto::device_id< raft_proto::device_type::gpu > device_id) |
auto | get_max_shared_mem_per_sm (raft_proto::device_id< raft_proto::device_type::gpu > device_id) |
auto | get_mem_clock_rate (raft_proto::device_id< raft_proto::device_type::gpu > device_id) |
auto | get_core_clock_rate (raft_proto::device_id< raft_proto::device_type::gpu > device_id) |
template<raft_proto::device_type D, typename forest_t > | |
void | infer (forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type *input, index_type row_count, index_type col_count, index_type output_count, bool has_categorical_nodes, typename forest_t::io_type *vector_output=nullptr, typename forest_t::node_type::index_type *categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt, raft_proto::device_id< D > device=raft_proto::device_id< D >{}, raft_proto::cuda_stream stream=raft_proto::cuda_stream{}) |
template<bool has_categorical_nodes, bool predict_leaf, typename forest_t , typename vector_output_t = std::nullptr_t, typename categorical_data_t = std::nullptr_t> | |
void | infer_kernel_cpu (forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type const *input, index_type row_count, index_type col_count, index_type num_outputs, index_type chunk_size=hardware_constructive_interference_size, index_type grove_size=hardware_constructive_interference_size, vector_output_t vector_output_p=nullptr, categorical_data_t categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind) |
template<typename threshold_t , typename index_t , typename metadata_storage_t , typename offset_t > | |
constexpr auto | get_node_alignment () |
using ML::experimental::fil::detail::preset_decision_forest = typedef decision_forest< layout, typename specialization_types<layout, double_precision, large_trees>::threshold_type, typename specialization_types<layout, double_precision, large_trees>::index_type, typename specialization_types<layout, double_precision, large_trees>::metadata_type, typename specialization_types<layout, double_precision, large_trees>::offset_type> |
A convenience wrapper to simplify template instantiation of decision_forest
This template takes the large range of available template parameters and reduces them to just three standard choices.
layout | The in-memory layout of nodes in this forest |
double_precision | Whether this model should use double-precision for floating-point evaluation and 64-bit integers for indexes |
large_trees | Whether this forest expects more than 2**(16 -3) - 1 = 8191 features or contains nodes whose child is offset more than 2**16 - 1 = 65535 nodes away. |
using ML::experimental::fil::detail::specialization_variant = typedef std::variant<specialization_types<tree_layout::depth_first, false, false>, specialization_types<tree_layout::depth_first, false, true>, specialization_types<tree_layout::depth_first, true, false>, specialization_types<tree_layout::depth_first, true, true>, specialization_types<tree_layout::breadth_first, false, false>, specialization_types<tree_layout::breadth_first, false, true>, specialization_types<tree_layout::breadth_first, true, false>, specialization_types<tree_layout::breadth_first, true, true> > |
HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree | ( | forest_t const & | forest, |
index_type | tree_index, | ||
io_t const *__restrict__ | row, | ||
categorical_data_t | categorical_data | ||
) |
Dispatch to an appropriate version of evaluate_tree kernel.
has_vector_leaves | Whether or not this tree has vector leaves |
has_categorical_nodes | Whether or not this tree has any nodes with categorical splits |
has_nonlocal_categories | Whether or not this tree has any nodes that store categorical split data externally |
predict_leaf | Whether to predict leaf IDs |
forest_t | The type of forest |
io_t | The type used for input to and output from this tree (typically either floats or doubles) |
categorical_data_t | The type for non-local categorical data storage. |
forest | The forest used to perform inference |
tree_index | The index of the tree we are evaluating |
row | The data row we are evaluating |
categorical_data | The pointer to where non-local data on categorical splits are stored. |
HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree_impl | ( | node_t const *__restrict__ | node, |
io_t const *__restrict__ | row, | ||
categorical_storage_t const *__restrict__ | categorical_storage, | ||
node_t const *__restrict__ | first_root_node = nullptr , |
||
node_id_mapping_t | node_id_mapping = nullptr |
||
) |
HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree_impl | ( | node_t const *__restrict__ | node, |
io_t const *__restrict__ | row, | ||
node_t const *__restrict__ | first_root_node = nullptr , |
||
node_id_mapping_t | node_id_mapping = nullptr |
||
) |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
|
constexpr |
|
inline |
void ML::experimental::fil::detail::infer | ( | forest_t const & | forest, |
postprocessor< typename forest_t::io_type > const & | postproc, | ||
typename forest_t::io_type * | output, | ||
typename forest_t::io_type * | input, | ||
index_type | row_count, | ||
index_type | col_count, | ||
index_type | output_count, | ||
bool | has_categorical_nodes, | ||
typename forest_t::io_type * | vector_output = nullptr , |
||
typename forest_t::node_type::index_type * | categorical_data = nullptr , |
||
infer_kind | infer_type = infer_kind::default_kind , |
||
std::optional< index_type > | specified_chunk_size = std::nullopt , |
||
raft_proto::device_id< D > | device = raft_proto::device_id<D>{} , |
||
raft_proto::cuda_stream | stream = raft_proto::cuda_stream{} |
||
) |
void ML::experimental::fil::detail::infer_kernel_cpu | ( | forest_t const & | forest, |
postprocessor< typename forest_t::io_type > const & | postproc, | ||
typename forest_t::io_type * | output, | ||
typename forest_t::io_type const * | input, | ||
index_type | row_count, | ||
index_type | col_count, | ||
index_type | num_outputs, | ||
index_type | chunk_size = hardware_constructive_interference_size , |
||
index_type | grove_size = hardware_constructive_interference_size , |
||
vector_output_t | vector_output_p = nullptr , |
||
categorical_data_t | categorical_data = nullptr , |
||
infer_kind | infer_type = infer_kind::default_kind |
||
) |
The CPU "kernel" used to actually perform forest inference
has_categorical_nodes | Whether or not this kernel should be compiled to operate on trees with categorical nodes. |
forest_t | The type of the forest object which will be used for inference. |
vector_output_t | If non-nullptr_t, this indicates the type we expect for outputs from vector leaves. |
categorical_data_t | If non-nullptr_t, this indicates the type we expect for non-local categorical data storage. |
forest | The forest used to perform inference |
postproc | The postprocessor object used to store all necessary data for postprocessing |
output | Pointer to the host-accessible buffer where output should be written |
input | Pointer to the host-accessible buffer where input should be read from |
row_count | The number of rows in the input |
col_count | The number of columns per row in the input |
num_outputs | The expected number of output elements per row |
chunk_size | The number of rows for each thread to process with its assigned trees before fetching a new set of trees/rows. |
grove_size | The number of trees to assign to a thread for each chunk of rows it processes. |
vector_output_p | If non-nullptr, a pointer to the stored leaf vector outputs for all leaf nodes |
categorical_data | If non-nullptr, a pointer to where non-local data on categorical splits are stored. |
infer_type | Type of inference to perform. Defaults to summing the outputs of all trees and produce an output per row. If set to "per_tree", we will instead output all outputs of individual trees. If set to "leaf_id", we will output the integer ID of the leaf node for each tree. |
void ML::experimental::fil::detail::initialize_device | ( | raft_proto::device_id< D > | device | ) |
void ML::experimental::fil::detail::initialize_device | ( | raft_proto::device_id_variant | device | ) |