Namespaces
	device_initialization

	inference

Classes
struct	bitset

struct	model_builder_error

struct	decision_forest_builder

struct	specialization_types

struct	postproc_params_t

Typedefs
template<tree_layout layout, bool double_precision, bool large_trees>
using	preset_decision_forest = decision_forest< layout, typename specialization_types< layout, double_precision, large_trees >::threshold_type, typename specialization_types< layout, double_precision, large_trees >::index_type, typename specialization_types< layout, double_precision, large_trees >::metadata_type, typename specialization_types< layout, double_precision, large_trees >::offset_type >

using	specialization_variant = std::variant< specialization_types< tree_layout::depth_first, false, false >, specialization_types< tree_layout::depth_first, false, true >, specialization_types< tree_layout::depth_first, true, false >, specialization_types< tree_layout::depth_first, true, true >, specialization_types< tree_layout::breadth_first, false, false >, specialization_types< tree_layout::breadth_first, false, true >, specialization_types< tree_layout::breadth_first, true, false >, specialization_types< tree_layout::breadth_first, true, true >, specialization_types< tree_layout::layered_children_together, false, false >, specialization_types< tree_layout::layered_children_together, false, true >, specialization_types< tree_layout::layered_children_together, true, false >, specialization_types< tree_layout::layered_children_together, true, true > >

Functions
template<typename forest_t , raft_proto::device_type D>
void	initialize_device (raft_proto::device_id< D > device)

template<typename forest_t >
void	initialize_device (raft_proto::device_id_variant device)

template<bool has_vector_leaves, bool has_categorical_nodes, typename node_t , typename io_t , typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto	evaluate_tree_impl (node_t const __restrict__ node, io_t const __restrict__ row, node_t const *__restrict__ first_root_node=nullptr, node_id_mapping_t node_id_mapping=nullptr)

template<bool has_vector_leaves, typename node_t , typename io_t , typename categorical_storage_t , typename node_id_mapping_t = std::nullptr_t>
HOST DEVICE auto	evaluate_tree_impl (node_t const __restrict__ node, io_t const __restrict__ row, categorical_storage_t const __restrict__ categorical_storage, node_t const __restrict__ first_root_node=nullptr, node_id_mapping_t node_id_mapping=nullptr)

template<bool has_vector_leaves, bool has_categorical_nodes, bool has_nonlocal_categories, bool predict_leaf, typename forest_t , typename io_t , typename categorical_data_t >
HOST DEVICE auto	evaluate_tree (forest_t const &forest, index_type tree_index, io_t const *__restrict__ row, categorical_data_t categorical_data)

auto	get_max_shared_mem_per_block (raft_proto::device_id< raft_proto::device_type::gpu > device_id)

auto	get_sm_count (raft_proto::device_id< raft_proto::device_type::gpu > device_id)

auto	get_max_threads_per_sm (raft_proto::device_id< raft_proto::device_type::gpu > device_id)

auto	get_max_shared_mem_per_sm (raft_proto::device_id< raft_proto::device_type::gpu > device_id)

auto	get_mem_clock_rate (raft_proto::device_id< raft_proto::device_type::gpu > device_id)

auto	get_core_clock_rate (raft_proto::device_id< raft_proto::device_type::gpu > device_id)

template<raft_proto::device_type D, typename forest_t >
void	infer (forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type output, typename forest_t::io_type input, index_type row_count, index_type col_count, index_type output_count, bool has_categorical_nodes, typename forest_t::io_type vector_output=nullptr, typename forest_t::node_type::index_type categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind, std::optional< index_type > specified_chunk_size=std::nullopt, raft_proto::device_id< D > device=raft_proto::device_id< D >{}, raft_proto::cuda_stream stream=raft_proto::cuda_stream{})

template<bool has_categorical_nodes, bool predict_leaf, typename forest_t , typename vector_output_t = std::nullptr_t, typename categorical_data_t = std::nullptr_t>
void	infer_kernel_cpu (forest_t const &forest, postprocessor< typename forest_t::io_type > const &postproc, typename forest_t::io_type output, typename forest_t::io_type const input, index_type row_count, index_type col_count, index_type num_outputs, index_type chunk_size=hardware_constructive_interference_size, index_type grove_size=hardware_constructive_interference_size, vector_output_t vector_output_p=nullptr, categorical_data_t categorical_data=nullptr, infer_kind infer_type=infer_kind::default_kind)

template<typename threshold_t , typename index_t , typename metadata_storage_t , typename offset_t >
constexpr auto	get_node_alignment ()

Typedef Documentation

◆ preset_decision_forest

template<tree_layout layout, bool double_precision, bool large_trees>

using ML::experimental::fil::detail::preset_decision_forest = typedef decision_forest< layout, typename specialization_types<layout, double_precision, large_trees>::threshold_type, typename specialization_types<layout, double_precision, large_trees>::index_type, typename specialization_types<layout, double_precision, large_trees>::metadata_type, typename specialization_types<layout, double_precision, large_trees>::offset_type>

A convenience wrapper to simplify template instantiation of decision_forest

This template takes the large range of available template parameters and reduces them to just three standard choices.

Template Parameters

layout	The in-memory layout of nodes in this forest
double_precision	Whether this model should use double-precision for floating-point evaluation and 64-bit integers for indexes
large_trees	Whether this forest expects more than 2(16 -3) - 1 = 8191 features or contains nodes whose child is offset more than 216 - 1 = 65535 nodes away.

◆ specialization_variant

Function Documentation

◆ evaluate_tree()

template<bool has_vector_leaves, bool has_categorical_nodes, bool has_nonlocal_categories, bool predict_leaf, typename forest_t , typename io_t , typename categorical_data_t >

HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree	(	forest_t const &	forest,
		index_type	tree_index,
		io_t const *__restrict__	row,
		categorical_data_t	categorical_data
	)

Dispatch to an appropriate version of evaluate_tree kernel.

Template Parameters

has_vector_leaves	Whether or not this tree has vector leaves
has_categorical_nodes	Whether or not this tree has any nodes with categorical splits
has_nonlocal_categories	Whether or not this tree has any nodes that store categorical split data externally
predict_leaf	Whether to predict leaf IDs
forest_t	The type of forest
io_t	The type used for input to and output from this tree (typically either floats or doubles)
categorical_data_t	The type for non-local categorical data storage.

Parameters

forest	The forest used to perform inference
tree_index	The index of the tree we are evaluating
row	The data row we are evaluating
categorical_data	The pointer to where non-local data on categorical splits are stored.

◆ evaluate_tree_impl() [1/2]

template<bool has_vector_leaves, typename node_t , typename io_t , typename categorical_storage_t , typename node_id_mapping_t = std::nullptr_t>

HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree_impl	(	node_t const *__restrict__	node,
		io_t const *__restrict__	row,
		categorical_storage_t const *__restrict__	categorical_storage,
		node_t const *__restrict__	first_root_node = `nullptr`,
		node_id_mapping_t	node_id_mapping = `nullptr`
	)

◆ evaluate_tree_impl() [2/2]

template<bool has_vector_leaves, bool has_categorical_nodes, typename node_t , typename io_t , typename node_id_mapping_t = std::nullptr_t>

HOST DEVICE auto ML::experimental::fil::detail::evaluate_tree_impl	(	node_t const *__restrict__	node,
		io_t const *__restrict__	row,
		node_t const *__restrict__	first_root_node = `nullptr`,
		node_id_mapping_t	node_id_mapping = `nullptr`
	)

◆ get_core_clock_rate()

auto ML::experimental::fil::detail::get_core_clock_rate ( raft_proto::device_id< raft_proto::device_type::gpu > device_id )

inline

◆ get_max_shared_mem_per_block()

auto ML::experimental::fil::detail::get_max_shared_mem_per_block ( raft_proto::device_id< raft_proto::device_type::gpu > device_id )

inline

◆ get_max_shared_mem_per_sm()

auto ML::experimental::fil::detail::get_max_shared_mem_per_sm ( raft_proto::device_id< raft_proto::device_type::gpu > device_id )

inline

◆ get_max_threads_per_sm()

auto ML::experimental::fil::detail::get_max_threads_per_sm ( raft_proto::device_id< raft_proto::device_type::gpu > device_id )

inline

◆ get_mem_clock_rate()

auto ML::experimental::fil::detail::get_mem_clock_rate ( raft_proto::device_id< raft_proto::device_type::gpu > device_id )

inline

◆ get_node_alignment()

template<typename threshold_t , typename index_t , typename metadata_storage_t , typename offset_t >

constexpr auto ML::experimental::fil::detail::get_node_alignment ( )

constexpr

◆ get_sm_count()

auto ML::experimental::fil::detail::get_sm_count ( raft_proto::device_id< raft_proto::device_type::gpu > device_id )

inline

◆ infer()

template<raft_proto::device_type D, typename forest_t >

void ML::experimental::fil::detail::infer	(	forest_t const &	forest,
		postprocessor< typename forest_t::io_type > const &	postproc,
		typename forest_t::io_type *	output,
		typename forest_t::io_type *	input,
		index_type	row_count,
		index_type	col_count,
		index_type	output_count,
		bool	has_categorical_nodes,
		typename forest_t::io_type *	vector_output = `nullptr`,
		typename forest_t::node_type::index_type *	categorical_data = `nullptr`,
		infer_kind	infer_type = `infer_kind::default_kind`,
		std::optional< index_type >	specified_chunk_size = `std::nullopt`,
		raft_proto::device_id< D >	device = `raft_proto::device_id<D>{}`,
		raft_proto::cuda_stream	stream = `raft_proto::cuda_stream{}`
	)

◆ infer_kernel_cpu()

template<bool has_categorical_nodes, bool predict_leaf, typename forest_t , typename vector_output_t = std::nullptr_t, typename categorical_data_t = std::nullptr_t>

void ML::experimental::fil::detail::infer_kernel_cpu	(	forest_t const &	forest,
		postprocessor< typename forest_t::io_type > const &	postproc,
		typename forest_t::io_type *	output,
		typename forest_t::io_type const *	input,
		index_type	row_count,
		index_type	col_count,
		index_type	num_outputs,
		index_type	chunk_size = `hardware_constructive_interference_size`,
		index_type	grove_size = `hardware_constructive_interference_size`,
		vector_output_t	vector_output_p = `nullptr`,
		categorical_data_t	categorical_data = `nullptr`,
		infer_kind	infer_type = `infer_kind::default_kind`
	)

The CPU "kernel" used to actually perform forest inference

Template Parameters

has_categorical_nodes	Whether or not this kernel should be compiled to operate on trees with categorical nodes.
forest_t	The type of the forest object which will be used for inference.
vector_output_t	If non-nullptr_t, this indicates the type we expect for outputs from vector leaves.
categorical_data_t	If non-nullptr_t, this indicates the type we expect for non-local categorical data storage.

Parameters

forest	The forest used to perform inference
postproc	The postprocessor object used to store all necessary data for postprocessing
output	Pointer to the host-accessible buffer where output should be written
input	Pointer to the host-accessible buffer where input should be read from
row_count	The number of rows in the input
col_count	The number of columns per row in the input
num_outputs	The expected number of output elements per row
chunk_size	The number of rows for each thread to process with its assigned trees before fetching a new set of trees/rows.
grove_size	The number of trees to assign to a thread for each chunk of rows it processes.
vector_output_p	If non-nullptr, a pointer to the stored leaf vector outputs for all leaf nodes
categorical_data	If non-nullptr, a pointer to where non-local data on categorical splits are stored.
infer_type	Type of inference to perform. Defaults to summing the outputs of all trees and produce an output per row. If set to "per_tree", we will instead output all outputs of individual trees. If set to "leaf_id", we will output the integer ID of the leaf node for each tree.

◆ initialize_device() [1/2]

template<typename forest_t , raft_proto::device_type D>

void ML::experimental::fil::detail::initialize_device ( raft_proto::device_id< D > device )

◆ initialize_device() [2/2]

template<typename forest_t >

void ML::experimental::fil::detail::initialize_device ( raft_proto::device_id_variant device )

Namespaces

Classes

Typedefs

Functions

Typedef Documentation

◆ preset_decision_forest

◆ specialization_variant

Function Documentation

◆ evaluate_tree()

◆ evaluate_tree_impl() [1/2]

◆ evaluate_tree_impl() [2/2]

◆ get_core_clock_rate()

◆ get_max_shared_mem_per_block()

◆ get_max_shared_mem_per_sm()

◆ get_max_threads_per_sm()

◆ get_mem_clock_rate()

◆ get_node_alignment()

◆ get_sm_count()

◆ infer()

◆ infer_kernel_cpu()

◆ initialize_device() [1/2]

◆ initialize_device() [2/2]