randomforest.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cuml/common/logger.hpp>
22 
23 #include <map>
24 #include <memory>
25 
26 namespace raft {
27 class handle_t; // forward decl
28 }
29 
30 namespace ML {
31 
32 enum RF_type {
35 };
36 
38 
39 struct RF_metrics {
41 
42  // Classification metrics
43  float accuracy;
44 
45  // Regression metrics
49 };
50 
52  float accuracy,
53  double mean_abs_error,
54  double mean_squared_error,
55  double median_abs_error);
57 RF_metrics set_rf_metrics_regression(double mean_abs_error,
58  double mean_squared_error,
59  double median_abs_error);
60 void print(const RF_metrics rf_metrics);
61 
62 struct RF_params {
66  int n_trees;
77  bool bootstrap;
81  float max_samples;
88  uint64_t seed;
94  int n_streams;
96 };
97 
98 /* Update labels so they are unique from 0 to n_unique_vals.
99  Create an old_label to new_label map per random forest.
100 */
101 void preprocess_labels(int n_rows,
102  std::vector<int>& labels,
103  std::map<int, int>& labels_map,
104  int verbosity = CUML_LEVEL_INFO);
105 
106 /* Revert preprocessing effect, if needed. */
107 void postprocess_labels(int n_rows,
108  std::vector<int>& labels,
109  std::map<int, int>& labels_map,
110  int verbosity = CUML_LEVEL_INFO);
111 
112 template <class T, class L>
114  std::vector<std::shared_ptr<DT::TreeMetaDataNode<T, L>>> trees;
116 };
117 
118 template <class T, class L>
120 
121 template <class T, class L>
123 
124 template <class T, class L>
126 
127 template <class T, class L>
128 std::string get_rf_json(const RandomForestMetaData<T, L>* forest);
129 
130 template <class T, class L>
132  const RandomForestMetaData<T, L>* forest,
133  int num_features);
134 
135 TreeliteModelHandle concatenate_trees(std::vector<TreeliteModelHandle> treelite_handles);
136 
137 // ----------------------------- Classification ----------------------------------- //
138 
141 
142 void fit(const raft::handle_t& user_handle,
143  RandomForestClassifierF*& forest,
144  float* input,
145  int n_rows,
146  int n_cols,
147  int* labels,
148  int n_unique_labels,
149  RF_params rf_params,
150  int verbosity = CUML_LEVEL_INFO);
151 void fit(const raft::handle_t& user_handle,
152  RandomForestClassifierD*& forest,
153  double* input,
154  int n_rows,
155  int n_cols,
156  int* labels,
157  int n_unique_labels,
158  RF_params rf_params,
159  int verbosity = CUML_LEVEL_INFO);
160 
161 void predict(const raft::handle_t& user_handle,
162  const RandomForestClassifierF* forest,
163  const float* input,
164  int n_rows,
165  int n_cols,
166  int* predictions,
167  int verbosity = CUML_LEVEL_INFO);
168 void predict(const raft::handle_t& user_handle,
169  const RandomForestClassifierD* forest,
170  const double* input,
171  int n_rows,
172  int n_cols,
173  int* predictions,
174  int verbosity = CUML_LEVEL_INFO);
175 
176 RF_metrics score(const raft::handle_t& user_handle,
177  const RandomForestClassifierF* forest,
178  const int* ref_labels,
179  int n_rows,
180  const int* predictions,
181  int verbosity = CUML_LEVEL_INFO);
182 RF_metrics score(const raft::handle_t& user_handle,
183  const RandomForestClassifierD* forest,
184  const int* ref_labels,
185  int n_rows,
186  const int* predictions,
187  int verbosity = CUML_LEVEL_INFO);
188 
189 RF_params set_rf_params(int max_depth,
190  int max_leaves,
191  float max_features,
192  int max_n_bins,
193  int min_samples_leaf,
194  int min_samples_split,
195  float min_impurity_decrease,
196  bool bootstrap,
197  int n_trees,
198  float max_samples,
199  uint64_t seed,
200  CRITERION split_criterion,
201  int cfg_n_streams,
202  int max_batch_size);
203 
204 // ----------------------------- Regression ----------------------------------- //
205 
208 
209 void fit(const raft::handle_t& user_handle,
210  RandomForestRegressorF*& forest,
211  float* input,
212  int n_rows,
213  int n_cols,
214  float* labels,
215  RF_params rf_params,
216  int verbosity = CUML_LEVEL_INFO);
217 void fit(const raft::handle_t& user_handle,
218  RandomForestRegressorD*& forest,
219  double* input,
220  int n_rows,
221  int n_cols,
222  double* labels,
223  RF_params rf_params,
224  int verbosity = CUML_LEVEL_INFO);
225 
226 void predict(const raft::handle_t& user_handle,
227  const RandomForestRegressorF* forest,
228  const float* input,
229  int n_rows,
230  int n_cols,
231  float* predictions,
232  int verbosity = CUML_LEVEL_INFO);
233 void predict(const raft::handle_t& user_handle,
234  const RandomForestRegressorD* forest,
235  const double* input,
236  int n_rows,
237  int n_cols,
238  double* predictions,
239  int verbosity = CUML_LEVEL_INFO);
240 
241 RF_metrics score(const raft::handle_t& user_handle,
242  const RandomForestRegressorF* forest,
243  const float* ref_labels,
244  int n_rows,
245  const float* predictions,
246  int verbosity = CUML_LEVEL_INFO);
247 RF_metrics score(const raft::handle_t& user_handle,
248  const RandomForestRegressorD* forest,
249  const double* ref_labels,
250  int n_rows,
251  const double* predictions,
252  int verbosity = CUML_LEVEL_INFO);
253 }; // namespace ML
#define CUML_LEVEL_INFO
Definition: log_levels.hpp:28
Definition: dbscan.hpp:30
void postprocess_labels(int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO)
void predict(const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const float *input, int n_rows, int n_cols, int *predictions, int verbosity=CUML_LEVEL_INFO)
std::string get_rf_json(const RandomForestMetaData< T, L > *forest)
void fit(const raft::handle_t &user_handle, RandomForestClassifierF *&forest, float *input, int n_rows, int n_cols, int *labels, int n_unique_labels, RF_params rf_params, int verbosity=CUML_LEVEL_INFO)
std::string get_rf_summary_text(const RandomForestMetaData< T, L > *forest)
RandomForestMetaData< float, int > RandomForestClassifierF
Definition: randomforest.hpp:139
void print(const RF_metrics rf_metrics)
void delete_rf_metadata(RandomForestMetaData< T, L > *forest)
RF_type
Definition: randomforest.hpp:32
@ REGRESSION
Definition: randomforest.hpp:34
@ CLASSIFICATION
Definition: randomforest.hpp:33
RF_metrics set_all_rf_metrics(RF_type rf_type, float accuracy, double mean_abs_error, double mean_squared_error, double median_abs_error)
RandomForestMetaData< double, double > RandomForestRegressorD
Definition: randomforest.hpp:207
RandomForestMetaData< float, float > RandomForestRegressorF
Definition: randomforest.hpp:206
void build_treelite_forest(TreeliteModelHandle *model, const RandomForestMetaData< T, L > *forest, int num_features)
CRITERION
Definition: algo_helper.h:20
RF_metrics set_rf_metrics_classification(float accuracy)
std::string get_rf_detailed_text(const RandomForestMetaData< T, L > *forest)
void preprocess_labels(int n_rows, std::vector< int > &labels, std::map< int, int > &labels_map, int verbosity=CUML_LEVEL_INFO)
RF_params set_rf_params(int max_depth, int max_leaves, float max_features, int max_n_bins, int min_samples_leaf, int min_samples_split, float min_impurity_decrease, bool bootstrap, int n_trees, float max_samples, uint64_t seed, CRITERION split_criterion, int cfg_n_streams, int max_batch_size)
RF_metrics set_rf_metrics_regression(double mean_abs_error, double mean_squared_error, double median_abs_error)
RandomForestMetaData< double, int > RandomForestClassifierD
Definition: randomforest.hpp:140
RF_metrics score(const raft::handle_t &user_handle, const RandomForestClassifierF *forest, const int *ref_labels, int n_rows, const int *predictions, int verbosity=CUML_LEVEL_INFO)
task_category
Definition: randomforest.hpp:37
@ REGRESSION_MODEL
Definition: randomforest.hpp:37
@ CLASSIFICATION_MODEL
Definition: randomforest.hpp:37
TreeliteModelHandle concatenate_trees(std::vector< TreeliteModelHandle > treelite_handles)
Definition: dbscan.hpp:26
Definition: decisiontree.hpp:29
Definition: randomforest.hpp:39
RF_type rf_type
Definition: randomforest.hpp:40
double mean_squared_error
Definition: randomforest.hpp:47
double median_abs_error
Definition: randomforest.hpp:48
float accuracy
Definition: randomforest.hpp:43
double mean_abs_error
Definition: randomforest.hpp:46
Definition: randomforest.hpp:62
uint64_t seed
Definition: randomforest.hpp:88
int n_streams
Definition: randomforest.hpp:94
DT::DecisionTreeParams tree_params
Definition: randomforest.hpp:95
bool bootstrap
Definition: randomforest.hpp:77
int n_trees
Definition: randomforest.hpp:66
float max_samples
Definition: randomforest.hpp:81
Definition: randomforest.hpp:113
RF_params rf_params
Definition: randomforest.hpp:115
std::vector< std::shared_ptr< DT::TreeMetaDataNode< T, L > > > trees
Definition: randomforest.hpp:114
void * TreeliteModelHandle
Definition: treelite_defs.hpp:23