Namespaces | Classes | Typedefs | Functions
ML::experimental::fil::detail Namespace Reference

Namespaces

 device_initialization
 
 inference
 

Classes

struct  bitset
 
struct  model_builder_error
 
struct  decision_forest_builder
 
struct  specialization_types
 
struct  traversal_container
 
struct  postproc_params_t
 

Typedefs

template<tree_layout layout, bool double_precision, bool large_trees>
using preset_decision_forest = decision_forest< layout, typename specialization_types< layout, double_precision, large_trees >::threshold_type, typename specialization_types< layout, double_precision, large_trees >::index_type, typename specialization_types< layout, double_precision, large_trees >::metadata_type, typename specialization_types< layout, double_precision, large_trees >::offset_type >
 
using specialization_variant = std::variant< specialization_types< tree_layout::depth_first, false, false >, specialization_types< tree_layout::depth_first, false, true >, specialization_types< tree_layout::depth_first, true, false >, specialization_types< tree_layout::depth_first, true, true >, specialization_types< tree_layout::breadth_first, false, false >, specialization_types< tree_layout::breadth_first, false, true >, specialization_types< tree_layout::breadth_first, true, false >, specialization_types< tree_layout::breadth_first, true, true > >
 

Functions

template<typename forest_t , raft_proto::device_type D>
void initialize_device (raft_proto::device_id< D > device)
 
template<typename forest_t >
void initialize_device (raft_proto::device_id_variant device)
 
template<bool has_vector_leaves, bool has_categorical_nodes, typename node_t , typename io_t , typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto evaluate_tree_impl (node_t const *__restrict__ node, io_t const *__restrict__ row, node_t const *__restrict__ first_root_node=nullptr, node_id_mapping_t node_id_mapping=nullptr)
 
template<bool has_vector_leaves, typename node_t , typename io_t , typename categorical_storage_t , typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto evaluate_tree_impl (node_t const *__restrict__ node, io_t const *__restrict__ row, categorical_storage_t const *__restrict__ categorical_storage, node_t const *__restrict__ first_root_node=nullptr, node_id_mapping_t node_id_mapping=nullptr)
 
template<bool has_vector_leaves, bool has_categorical_nodes, bool has_nonlocal_categories, bool predict_leaf, typename forest_t , typename io_t , typename categorical_data_t >
HOST DEVICE auto evaluate_tree (forest_t const &forest, index_type tree_index, io_t const *__restrict__ row, categorical_data_t categorical_data)
 
auto get_max_shared_mem_per_block (raft_proto::device_id< raft_proto::device_type::gpu > device_id)
 
auto get_sm_count (raft_proto::device_id< raft_proto::device_type::gpu > device_id)
 
auto get_max_threads_per_sm (raft_proto::device_id< raft_proto::device_type::gpu > device_id)
 
auto get_max_shared_mem_per_sm (raft_proto::device_id< raft_proto::device_type::gpu > device_id)
 
auto get_mem_clock_rate (raft_proto::device_id< raft_proto::device_type::gpu > device_id)
 
auto get_core_clock_rate (raft_proto::device_id< raft_proto::device_type::gpu > device_id)
 
template<raft_proto::device_type D, typename forest_t >
void infer (forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type *input, index_type row_count, index_type col_count, index_type output_count, bool has_categorical_nodes, typename forest_t::io_type *vector_output=nullptr, typename forest_t::node_type::index_type *categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt, raft_proto::device_id< D > device=raft_proto::device_id< D >{}, raft_proto::cuda_stream stream=raft_proto::cuda_stream{})
 
template<bool has_categorical_nodes, bool predict_leaf, typename forest_t , typename vector_output_t = std::nullptr_t, typename categorical_data_t = std::nullptr_t>
void infer_kernel_cpu (forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type *output, typename forest_t::io_type const *input, index_type row_count, index_type col_count, index_type num_outputs, index_type chunk_size=hardware_constructive_interference_size, index_type grove_size=hardware_constructive_interference_size, vector_output_t vector_output_p=nullptr, categorical_data_t categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind)
 
template<typename threshold_t , typename index_t , typename metadata_storage_t , typename offset_t >
constexpr auto get_node_alignment ()
 

Typedef Documentation

◆ preset_decision_forest

template<tree_layout layout, bool double_precision, bool large_trees>
using ML::experimental::fil::detail::preset_decision_forest = typedef decision_forest< layout, typename specialization_types<layout, double_precision, large_trees>::threshold_type, typename specialization_types<layout, double_precision, large_trees>::index_type, typename specialization_types<layout, double_precision, large_trees>::metadata_type, typename specialization_types<layout, double_precision, large_trees>::offset_type>

A convenience wrapper to simplify template instantiation of decision_forest

This template takes the large range of available template parameters and reduces them to just three standard choices.

Template Parameters
layoutThe in-memory layout of nodes in this forest
double_precisionWhether this model should use double-precision for floating-point evaluation and 64-bit integers for indexes
large_treesWhether this forest expects more than 2**(16 -3) - 1 = 8191 features or contains nodes whose child is offset more than 2**16 - 1 = 65535 nodes away.

◆ specialization_variant

Function Documentation

◆ evaluate_tree()

template<bool has_vector_leaves, bool has_categorical_nodes, bool has_nonlocal_categories, bool predict_leaf, typename forest_t , typename io_t , typename categorical_data_t >
HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree ( forest_t const &  forest,
index_type  tree_index,
io_t const *__restrict__  row,
categorical_data_t  categorical_data 
)

Dispatch to an appropriate version of evaluate_tree kernel.

Template Parameters
has_vector_leavesWhether or not this tree has vector leaves
has_categorical_nodesWhether or not this tree has any nodes with categorical splits
has_nonlocal_categoriesWhether or not this tree has any nodes that store categorical split data externally
predict_leafWhether to predict leaf IDs
forest_tThe type of forest
io_tThe type used for input to and output from this tree (typically either floats or doubles)
categorical_data_tThe type for non-local categorical data storage.
Parameters
forestThe forest used to perform inference
tree_indexThe index of the tree we are evaluating
rowThe data row we are evaluating
categorical_dataThe pointer to where non-local data on categorical splits are stored.

◆ evaluate_tree_impl() [1/2]

template<bool has_vector_leaves, typename node_t , typename io_t , typename categorical_storage_t , typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree_impl ( node_t const *__restrict__  node,
io_t const *__restrict__  row,
categorical_storage_t const *__restrict__  categorical_storage,
node_t const *__restrict__  first_root_node = nullptr,
node_id_mapping_t  node_id_mapping = nullptr 
)

◆ evaluate_tree_impl() [2/2]

template<bool has_vector_leaves, bool has_categorical_nodes, typename node_t , typename io_t , typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree_impl ( node_t const *__restrict__  node,
io_t const *__restrict__  row,
node_t const *__restrict__  first_root_node = nullptr,
node_id_mapping_t  node_id_mapping = nullptr 
)

◆ get_core_clock_rate()

auto ML::experimental::fil::detail::get_core_clock_rate ( raft_proto::device_id< raft_proto::device_type::gpu device_id)
inline

◆ get_max_shared_mem_per_block()

auto ML::experimental::fil::detail::get_max_shared_mem_per_block ( raft_proto::device_id< raft_proto::device_type::gpu device_id)
inline

◆ get_max_shared_mem_per_sm()

auto ML::experimental::fil::detail::get_max_shared_mem_per_sm ( raft_proto::device_id< raft_proto::device_type::gpu device_id)
inline

◆ get_max_threads_per_sm()

auto ML::experimental::fil::detail::get_max_threads_per_sm ( raft_proto::device_id< raft_proto::device_type::gpu device_id)
inline

◆ get_mem_clock_rate()

auto ML::experimental::fil::detail::get_mem_clock_rate ( raft_proto::device_id< raft_proto::device_type::gpu device_id)
inline

◆ get_node_alignment()

template<typename threshold_t , typename index_t , typename metadata_storage_t , typename offset_t >
constexpr auto ML::experimental::fil::detail::get_node_alignment ( )
constexpr

◆ get_sm_count()

auto ML::experimental::fil::detail::get_sm_count ( raft_proto::device_id< raft_proto::device_type::gpu device_id)
inline

◆ infer()

template<raft_proto::device_type D, typename forest_t >
void ML::experimental::fil::detail::infer ( forest_t const &  forest,
postprocessor< typename forest_t::io_type > const &  postproc,
typename forest_t::io_type *  output,
typename forest_t::io_type *  input,
index_type  row_count,
index_type  col_count,
index_type  output_count,
bool  has_categorical_nodes,
typename forest_t::io_type *  vector_output = nullptr,
typename forest_t::node_type::index_type *  categorical_data = nullptr,
infer_kind  infer_type = infer_kind::default_kind,
std::optional< index_type specified_chunk_size = std::nullopt,
raft_proto::device_id< D >  device = raft_proto::device_id<D>{},
raft_proto::cuda_stream  stream = raft_proto::cuda_stream{} 
)

◆ infer_kernel_cpu()

template<bool has_categorical_nodes, bool predict_leaf, typename forest_t , typename vector_output_t = std::nullptr_t, typename categorical_data_t = std::nullptr_t>
void ML::experimental::fil::detail::infer_kernel_cpu ( forest_t const &  forest,
postprocessor< typename forest_t::io_type > const &  postproc,
typename forest_t::io_type *  output,
typename forest_t::io_type const *  input,
index_type  row_count,
index_type  col_count,
index_type  num_outputs,
index_type  chunk_size = hardware_constructive_interference_size,
index_type  grove_size = hardware_constructive_interference_size,
vector_output_t  vector_output_p = nullptr,
categorical_data_t  categorical_data = nullptr,
infer_kind  infer_type = infer_kind::default_kind 
)

The CPU "kernel" used to actually perform forest inference

Template Parameters
has_categorical_nodesWhether or not this kernel should be compiled to operate on trees with categorical nodes.
forest_tThe type of the forest object which will be used for inference.
vector_output_tIf non-nullptr_t, this indicates the type we expect for outputs from vector leaves.
categorical_data_tIf non-nullptr_t, this indicates the type we expect for non-local categorical data storage.
Parameters
forestThe forest used to perform inference
postprocThe postprocessor object used to store all necessary data for postprocessing
outputPointer to the host-accessible buffer where output should be written
inputPointer to the host-accessible buffer where input should be read from
row_countThe number of rows in the input
col_countThe number of columns per row in the input
num_outputsThe expected number of output elements per row
chunk_sizeThe number of rows for each thread to process with its assigned trees before fetching a new set of trees/rows.
grove_sizeThe number of trees to assign to a thread for each chunk of rows it processes.
vector_output_pIf non-nullptr, a pointer to the stored leaf vector outputs for all leaf nodes
categorical_dataIf non-nullptr, a pointer to where non-local data on categorical splits are stored.
infer_typeType of inference to perform. Defaults to summing the outputs of all trees and produce an output per row. If set to "per_tree", we will instead output all outputs of individual trees. If set to "leaf_id", we will output the integer ID of the leaf node for each tree.

◆ initialize_device() [1/2]

template<typename forest_t , raft_proto::device_type D>
void ML::experimental::fil::detail::initialize_device ( raft_proto::device_id< D >  device)

◆ initialize_device() [2/2]

template<typename forest_t >
void ML::experimental::fil::detail::initialize_device ( raft_proto::device_id_variant  device)