aggregation.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/types.hpp>
20 
21 #include <functional>
22 #include <memory>
23 #include <vector>
24 
34 namespace cudf {
41 // forward declaration
42 namespace detail {
43 class aggregation_finalizer;
44 } // namespace detail
52 class aggregation {
53  public:
57  enum Kind {
58  SUM,
60  MIN,
61  MAX,
64  ANY,
65  ALL,
67  MEAN,
69  STD,
79  LEAD,
80  LAG,
81  PTX,
82  CUDA
83  };
84 
87 
88  virtual bool is_equal(aggregation const& other) const { return kind == other.kind; }
89 
90  virtual size_t do_hash() const { return std::hash<int>{}(kind); }
91 
92  virtual std::unique_ptr<aggregation> clone() const
93  {
94  return std::make_unique<aggregation>(*this);
95  }
96 
97  virtual ~aggregation() = default;
98 
99  // override functions for compound aggregations
100  virtual std::vector<aggregation::Kind> get_simple_aggregations(data_type col_type) const;
101  virtual void finalize(cudf::detail::aggregation_finalizer& finalizer);
102 };
103 
104 enum class udf_type : bool { CUDA, PTX };
105 
107 std::unique_ptr<aggregation> make_sum_aggregation();
108 
110 std::unique_ptr<aggregation> make_product_aggregation();
111 
113 std::unique_ptr<aggregation> make_min_aggregation();
114 
116 std::unique_ptr<aggregation> make_max_aggregation();
117 
123 std::unique_ptr<aggregation> make_count_aggregation(
124  null_policy null_handling = null_policy::EXCLUDE);
125 
127 std::unique_ptr<aggregation> make_any_aggregation();
128 
130 std::unique_ptr<aggregation> make_all_aggregation();
131 
133 std::unique_ptr<aggregation> make_sum_of_squares_aggregation();
134 
136 std::unique_ptr<aggregation> make_mean_aggregation();
137 
144 std::unique_ptr<aggregation> make_variance_aggregation(size_type ddof = 1);
145 
152 std::unique_ptr<aggregation> make_std_aggregation(size_type ddof = 1);
153 
155 std::unique_ptr<aggregation> make_median_aggregation();
156 
163 std::unique_ptr<aggregation> make_quantile_aggregation(std::vector<double> const& q,
164  interpolation i = interpolation::LINEAR);
165 
171 std::unique_ptr<aggregation> make_argmax_aggregation();
172 
178 std::unique_ptr<aggregation> make_argmin_aggregation();
179 
186 std::unique_ptr<aggregation> make_nunique_aggregation(
187  null_policy null_handling = null_policy::EXCLUDE);
188 
202 std::unique_ptr<aggregation> make_nth_element_aggregation(
203  size_type n, null_policy null_handling = null_policy::INCLUDE);
204 
206 std::unique_ptr<aggregation> make_row_number_aggregation();
207 
218 std::unique_ptr<aggregation> make_collect_list_aggregation(
219  null_policy null_handling = null_policy::INCLUDE);
220 
236 std::unique_ptr<aggregation> make_collect_set_aggregation(
237  null_policy null_handling = null_policy::INCLUDE,
238  null_equality nulls_equal = null_equality::EQUAL,
239  nan_equality nans_equal = nan_equality::UNEQUAL);
240 
242 std::unique_ptr<aggregation> make_lag_aggregation(size_type offset);
243 
245 std::unique_ptr<aggregation> make_lead_aggregation(size_type offset);
246 
256 std::unique_ptr<aggregation> make_udf_aggregation(udf_type type,
257  std::string const& user_defined_aggregator,
258  data_type output_type);
259  // end of group
261 } // namespace cudf
cudf::make_lead_aggregation
std::unique_ptr< aggregation > make_lead_aggregation(size_type offset)
Factory to create a LEAD aggregation.
cudf::make_min_aggregation
std::unique_ptr< aggregation > make_min_aggregation()
Factory to create a MIN aggregation.
cudf::aggregation::kind
Kind kind
The aggregation to perform.
Definition: aggregation.hpp:86
cudf::make_count_aggregation
std::unique_ptr< aggregation > make_count_aggregation(null_policy null_handling=null_policy::EXCLUDE)
Factory to create a COUNT aggregation.
cudf::null_policy
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:127
cudf::aggregation::NTH_ELEMENT
@ NTH_ELEMENT
get the nth element
Definition: aggregation.hpp:75
cudf::aggregation::SUM
@ SUM
sum reduction
Definition: aggregation.hpp:58
types.hpp
Type declarations for libcudf.
cudf::interpolation
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
Definition: types.hpp:193
cudf::aggregation::MIN
@ MIN
min reduction
Definition: aggregation.hpp:60
cudf::aggregation::STD
@ STD
groupwise standard deviation
Definition: aggregation.hpp:69
cudf::aggregation::COUNT_VALID
@ COUNT_VALID
count number of valid elements
Definition: aggregation.hpp:62
cudf::aggregation::ARGMAX
@ ARGMAX
Index of max element.
Definition: aggregation.hpp:72
cudf::make_nunique_aggregation
std::unique_ptr< aggregation > make_nunique_aggregation(null_policy null_handling=null_policy::EXCLUDE)
Factory to create a nunique aggregation.
cudf::make_quantile_aggregation
std::unique_ptr< aggregation > make_quantile_aggregation(std::vector< double > const &q, interpolation i=interpolation::LINEAR)
Factory to create a QUANTILE aggregation.
cudf::make_argmin_aggregation
std::unique_ptr< aggregation > make_argmin_aggregation()
Factory to create an argmin aggregation.
cudf::aggregation::PRODUCT
@ PRODUCT
product reduction
Definition: aggregation.hpp:59
cudf::aggregation::COLLECT_LIST
@ COLLECT_LIST
collect values into a list
Definition: aggregation.hpp:77
cudf::make_product_aggregation
std::unique_ptr< aggregation > make_product_aggregation()
Factory to create a PRODUCT aggregation.
cudf::aggregation::VARIANCE
@ VARIANCE
groupwise variance
Definition: aggregation.hpp:68
cudf::aggregation::CUDA
@ CUDA
CUDA UDF based reduction.
Definition: aggregation.hpp:82
cudf::aggregation::ANY
@ ANY
any reduction
Definition: aggregation.hpp:64
cudf::aggregation::ARGMIN
@ ARGMIN
Index of min element.
Definition: aggregation.hpp:73
cudf::make_row_number_aggregation
std::unique_ptr< aggregation > make_row_number_aggregation()
Factory to create a ROW_NUMBER aggregation.
cudf::make_nth_element_aggregation
std::unique_ptr< aggregation > make_nth_element_aggregation(size_type n, null_policy null_handling=null_policy::INCLUDE)
Factory to create a nth_element aggregation.
cudf::make_all_aggregation
std::unique_ptr< aggregation > make_all_aggregation()
Factory to create a ALL aggregation.
cudf::make_std_aggregation
std::unique_ptr< aggregation > make_std_aggregation(size_type ddof=1)
Factory to create a STD aggregation.
cudf::aggregation::MEAN
@ MEAN
arithmetic mean reduction
Definition: aggregation.hpp:67
cudf::nan_equality
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:144
cudf::aggregation::MEDIAN
@ MEDIAN
median reduction
Definition: aggregation.hpp:70
cudf::nan_equality::ALL_EQUAL
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cudf::aggregation::QUANTILE
@ QUANTILE
compute specified quantile(s)
Definition: aggregation.hpp:71
cudf::make_mean_aggregation
std::unique_ptr< aggregation > make_mean_aggregation()
Factory to create a MEAN aggregation.
cudf::aggregation::LEAD
@ LEAD
window function, accesses row at specified offset following current row
Definition: aggregation.hpp:79
cudf::aggregation::LAG
@ LAG
window function, accesses row at specified offset preceding current row
Definition: aggregation.hpp:80
cudf::make_lag_aggregation
std::unique_ptr< aggregation > make_lag_aggregation(size_type offset)
Factory to create a LAG aggregation.
cudf::data_type
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
cudf::make_median_aggregation
std::unique_ptr< aggregation > make_median_aggregation()
Factory to create a MEDIAN aggregation.
cudf::make_sum_of_squares_aggregation
std::unique_ptr< aggregation > make_sum_of_squares_aggregation()
Factory to create a SUM_OF_SQUARES aggregation.
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::aggregation::SUM_OF_SQUARES
@ SUM_OF_SQUARES
sum of squares reduction
Definition: aggregation.hpp:66
cudf::aggregation::COLLECT_SET
@ COLLECT_SET
collect values into a list without duplicate entries
Definition: aggregation.hpp:78
cudf::make_collect_set_aggregation
std::unique_ptr< aggregation > make_collect_set_aggregation(null_policy null_handling=null_policy::INCLUDE, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::UNEQUAL)
Factory to create a COLLECT_SET aggregation.
cudf::aggregation::PTX
@ PTX
PTX UDF based reduction.
Definition: aggregation.hpp:81
cudf::null_policy::EXCLUDE
@ EXCLUDE
exclude null elements
cudf::make_any_aggregation
std::unique_ptr< aggregation > make_any_aggregation()
Factory to create a ANY aggregation.
cudf::make_udf_aggregation
std::unique_ptr< aggregation > make_udf_aggregation(udf_type type, std::string const &user_defined_aggregator, data_type output_type)
Factory to create an aggregation base on UDF for PTX or CUDA.
cudf::null_equality
null_equality
Definition: types.hpp:152
cudf::aggregation::MAX
@ MAX
max reduction
Definition: aggregation.hpp:61
cudf::make_variance_aggregation
std::unique_ptr< aggregation > make_variance_aggregation(size_type ddof=1)
Factory to create a VARIANCE aggregation.
cudf::aggregation::ROW_NUMBER
@ ROW_NUMBER
get row-number of current index (relative to rolling window)
Definition: aggregation.hpp:76
cudf::make_collect_list_aggregation
std::unique_ptr< aggregation > make_collect_list_aggregation(null_policy null_handling=null_policy::INCLUDE)
Factory to create a COLLECT_LIST aggregation.
cudf::make_sum_aggregation
std::unique_ptr< aggregation > make_sum_aggregation()
Factory to create a SUM aggregation.
cudf::make_argmax_aggregation
std::unique_ptr< aggregation > make_argmax_aggregation()
Factory to create an argmax aggregation.
cudf::aggregation
Base class for specifying the desired aggregation in an aggregation_request.
Definition: aggregation.hpp:52
cudf::make_max_aggregation
std::unique_ptr< aggregation > make_max_aggregation()
Factory to create a MAX aggregation.
cudf::aggregation::COUNT_ALL
@ COUNT_ALL
count number of elements
Definition: aggregation.hpp:63
cudf::aggregation::Kind
Kind
Possible aggregation operations.
Definition: aggregation.hpp:57
cudf::aggregation::NUNIQUE
@ NUNIQUE
count number of unique elements
Definition: aggregation.hpp:74
cudf::aggregation::ALL
@ ALL
all reduction
Definition: aggregation.hpp:65