10 #include <cudf/utilities/export.hpp>
25 namespace CUDF_EXPORT
cudf {
83 SUM_WITH_OVERFLOW = SUM_OVERFLOW,
134 CUDF_FAIL(
"No-parameter aggregation constructor should never be called");
152 [[nodiscard]]
bool is_valid()
const {
return kind >= 0 && kind < Kind::INVALID; }
167 [[nodiscard]]
virtual size_t do_hash()
const {
return std::hash<int>{}(kind); }
174 [[nodiscard]]
virtual std::unique_ptr<aggregation>
clone()
const = 0;
214 template <
typename Base = aggregation>
219 template <
typename Base = aggregation>
225 template <
typename Base = aggregation>
230 template <
typename Base = aggregation>
235 template <
typename Base = aggregation>
240 template <
typename Base = aggregation>
249 template <
typename Base = aggregation>
254 template <
typename Base = aggregation>
259 template <
typename Base = aggregation>
264 template <
typename Base = aggregation>
269 template <
typename Base = aggregation>
274 template <
typename Base = aggregation>
289 template <
typename Base = aggregation>
301 template <
typename Base = aggregation>
313 template <
typename Base = aggregation>
318 template <
typename Base = aggregation>
328 template <
typename Base = aggregation>
338 template <
typename Base = aggregation>
347 template <
typename Base = aggregation>
357 template <
typename Base = aggregation>
374 template <
typename Base = aggregation>
380 template <
typename Base = aggregation>
416 template <
typename Base = aggregation>
491 template <
typename Base = aggregation>
493 order column_order = order::ASCENDING,
495 null_order null_precedence = null_order::AFTER,
509 template <
typename Base = aggregation>
529 template <
typename Base = aggregation>
541 template <
typename Base = aggregation>
550 template <
typename Base = aggregation>
562 template <
typename Base = aggregation>
564 std::string
const& user_defined_aggregator,
576 template <
typename Base = aggregation>
590 template <
typename Base = aggregation>
615 template <
typename Base = aggregation>
634 template <
typename Base = aggregation>
645 template <
typename Base = aggregation>
658 template <
typename Base = aggregation>
671 template <
typename Base = aggregation>
709 template <
typename Base>
747 template <
typename Base>
756 template <
typename Base>
769 template <
typename Base = aggregation>
Abstract base class for specifying the desired aggregation in an aggregation_request.
bool is_valid() const
Checks if the aggregation is valid, i.e. it was constructed with a valid value for the aggregation ki...
aggregation()
Default constructor.
aggregation(Kind kind_)
Construct a new aggregation object from a given aggregation kind.
virtual bool is_equal(aggregation const &other) const
Compares two aggregation objects for equality.
virtual size_t do_hash() const
Computes the hash value of the aggregation.
Kind
Possible aggregation operations.
@ PRODUCT
product reduction
@ TOP_K
top k elements in a group
@ M2
sum of squares of differences from the mean
@ TDIGEST
create a tdigest from a set of input values
@ MEAN
arithmetic mean reduction
@ MERGE_M2
merge partial values of M2 aggregation,
@ BITWISE_AGG
bitwise aggregation on numeric columns
@ PTX
PTX based UDF aggregation.
@ MERGE_SETS
merge multiple lists values into one list then drop duplicate entries
@ NUNIQUE
count number of unique elements
@ MERGE_HISTOGRAM
merge partial values of HISTOGRAM aggregation
@ ARGMIN
Index of min element.
@ CORRELATION
correlation between two sets of elements
@ QUANTILE
compute specified quantile(s)
@ COVARIANCE
covariance between two sets of elements
@ COLLECT_SET
collect values into a list without duplicate entries
@ LAG
window function, accesses row at specified offset preceding current row
@ CUDA
CUDA based UDF aggregation.
@ LEAD
window function, accesses row at specified offset following current row
@ SUM_OF_SQUARES
sum of squares reduction
@ NTH_ELEMENT
get the nth element
@ EWMA
get exponential weighted moving average at current index
@ MERGE_LISTS
merge multiple lists values into one list
@ MERGE_TDIGEST
create a tdigest by merging multiple tdigests together
@ HOST_UDF
host based UDF aggregation
@ COLLECT_LIST
collect values into a list
@ COUNT_VALID
count number of valid elements
@ ROW_NUMBER
get row-number of current index (relative to rolling window)
@ ARGMAX
Index of max element.
@ HISTOGRAM
compute frequency of each element
@ RANK
get rank of current index
@ COUNT_ALL
count number of elements
virtual std::unique_ptr< aggregation > clone() const =0
Clones the aggregation object.
Kind kind
The aggregation to perform.
Indicator for the logical data type of an element in a column.
Derived class intended for groupby specific aggregation usage.
Derived class intended for groupby specific scan usage.
The fundamental interface for host-based UDF implementation.
Derived class intended for reduction usage.
Derived class intended for rolling_window specific aggregation usage.
Derived class intended for scan usage.
Derived class intended for segmented reduction usage.
std::unique_ptr< Base > make_bitwise_aggregation(bitwise_op op)
Factory to create a BITWISE_AGG aggregation.
std::unique_ptr< Base > make_top_k_aggregation(size_type k, order topk_order=order::DESCENDING)
Factory to create a TOP_K aggregation.
std::unique_ptr< Base > make_median_aggregation()
correlation_type
Type of correlation method.
std::unique_ptr< Base > make_host_udf_aggregation(std::unique_ptr< host_udf_base > host_udf)
Factory to create a HOST_UDF aggregation.
std::unique_ptr< Base > make_lag_aggregation(size_type offset)
Factory to create a LAG aggregation.
std::unique_ptr< Base > make_tdigest_aggregation(int max_centroids=1000)
Factory to create a TDIGEST aggregation.
rank_percentage
Whether returned rank should be percentage or not and mention the type of percentage normalization.
std::unique_ptr< Base > make_covariance_aggregation(size_type min_periods=1, size_type ddof=1)
Factory to create a COVARIANCE aggregation.
std::unique_ptr< Base > make_std_aggregation(size_type ddof=1)
Factory to create a STD aggregation.
std::unique_ptr< Base > make_correlation_aggregation(correlation_type type, size_type min_periods=1)
Factory to create a CORRELATION aggregation.
std::unique_ptr< Base > make_merge_sets_aggregation(null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL)
Factory to create a MERGE_SETS aggregation.
std::unique_ptr< Base > make_variance_aggregation(size_type ddof=1)
Factory to create a VARIANCE aggregation.
std::unique_ptr< Base > make_lead_aggregation(size_type offset)
Factory to create a LEAD aggregation.
std::unique_ptr< Base > make_any_aggregation()
std::unique_ptr< Base > make_nunique_aggregation(null_policy null_handling=null_policy::EXCLUDE)
Factory to create a NUNIQUE aggregation.
std::unique_ptr< Base > make_max_aggregation()
std::unique_ptr< Base > make_sum_with_overflow_aggregation()
std::unique_ptr< Base > make_histogram_aggregation()
std::unique_ptr< Base > make_rank_aggregation(rank_method method, order column_order=order::ASCENDING, null_policy null_handling=null_policy::EXCLUDE, null_order null_precedence=null_order::AFTER, rank_percentage percentage=rank_percentage::NONE)
Factory to create a RANK aggregation.
std::unique_ptr< Base > make_row_number_aggregation()
std::unique_ptr< Base > make_merge_histogram_aggregation()
Factory to create a MERGE_HISTOGRAM aggregation.
std::unique_ptr< Base > make_count_aggregation(null_policy null_handling=null_policy::EXCLUDE)
Factory to create a COUNT aggregation.
bitwise_op
Bitwise operations to use for BITWISE_AGG aggregations on numeric columns.
ewm_history
Type of treatment of EWM input values' first value.
std::unique_ptr< Base > make_collect_list_aggregation(null_policy null_handling=null_policy::INCLUDE)
Factory to create a COLLECT_LIST aggregation.
std::unique_ptr< Base > make_argmax_aggregation()
Factory to create an ARGMAX aggregation.
std::unique_ptr< Base > make_sum_aggregation()
std::unique_ptr< Base > make_all_aggregation()
std::unique_ptr< Base > make_sum_overflow_aggregation()
std::unique_ptr< Base > make_m2_aggregation()
Factory to create a M2 aggregation.
std::unique_ptr< Base > make_merge_m2_aggregation()
Factory to create a MERGE_M2 aggregation.
std::unique_ptr< Base > make_sum_of_squares_aggregation()
std::unique_ptr< Base > make_min_aggregation()
bool is_valid_aggregation(data_type source, aggregation::Kind kind)
Indicate if an aggregation is supported for a source datatype.
std::unique_ptr< Base > make_product_aggregation()
std::unique_ptr< Base > make_nth_element_aggregation(size_type n, null_policy null_handling=null_policy::INCLUDE)
Factory to create a NTH_ELEMENT aggregation.
std::unique_ptr< Base > make_ewma_aggregation(double const center_of_mass, ewm_history history)
Factory to create an EWMA aggregation.
std::unique_ptr< Base > make_udf_aggregation(udf_source_type type, std::string const &user_defined_aggregator, data_type output_type)
Factory to create an aggregation base on UDF for PTX or CUDA.
std::unique_ptr< Base > make_merge_lists_aggregation()
Factory to create a MERGE_LISTS aggregation.
std::unique_ptr< Base > make_collect_set_aggregation(null_policy null_handling=null_policy::INCLUDE, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL)
Factory to create a COLLECT_SET aggregation.
std::unique_ptr< Base > make_argmin_aggregation()
Factory to create an ARGMIN aggregation.
std::unique_ptr< Base > make_quantile_aggregation(std::vector< double > const &quantiles, interpolation interp=interpolation::LINEAR)
Factory to create a QUANTILE aggregation.
std::unique_ptr< Base > make_mean_aggregation()
std::unique_ptr< Base > make_merge_tdigest_aggregation(int max_centroids=1000)
Factory to create a MERGE_TDIGEST aggregation.
@ ONE_NORMALIZED
(rank - 1) / (count - 1)
@ ZERO_NORMALIZED
rank / count
@ AND
bitwise AND operation
@ XOR
bitwise XOR operation
std::unique_ptr< table > quantiles(table_view const &input, std::vector< double > const &q, interpolation interp=interpolation::NEAREST, cudf::sorted is_input_sorted=sorted::NO, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns the rows of the input corresponding to the requested quantiles.
rank_method
Tie-breaker method to use for ranking the column.
@ DENSE
rank always increases by 1 between groups
@ AVERAGE
mean of first in the group
@ MAX
max of first in the group
@ FIRST
stable sort order ranking (no ties)
@ MIN
min of first in the group
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
null_order
Indicates how null values compare against all other values.
null_equality
Enum to consider two nulls as equal or unequal.
int32_t size_type
Row index type for columns and tables.
null_policy
Enum to specify whether to include nulls or exclude nulls.
order
Indicates the order in which elements should be sorted.
interpolation
Interpolation method to use when the desired quantile lies between two data points i and j.
udf_source_type
Indicates the source language of a user defined function (UDF) to be used in JIT APIs.
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Type declarations for libcudf.