groupby.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/aggregation.hpp>
21 #include <cudf/replace.hpp>
23 #include <cudf/types.hpp>
24 #include <cudf/utilities/span.hpp>
25 
26 #include <rmm/cuda_stream_view.hpp>
28 
29 #include <memory>
30 #include <utility>
31 #include <vector>
32 
33 namespace cudf {
35 namespace groupby {
36 namespace detail {
37 namespace sort {
38 class sort_groupby_helper;
39 
40 } // namespace sort
41 } // namespace detail
42 
61  std::vector<std::unique_ptr<groupby_aggregation>> aggregations;
62 };
63 
74 struct scan_request {
76  std::vector<std::unique_ptr<groupby_scan_aggregation>> aggregations;
77 };
78 
88  std::vector<std::unique_ptr<column>> results{};
89 };
90 
94 class groupby {
95  public:
96  groupby() = delete;
97  ~groupby();
98  groupby(groupby const&) = delete;
99  groupby(groupby&&) = delete;
100  groupby& operator=(groupby const&) = delete;
101  groupby& operator=(groupby&&) = delete;
102 
126  explicit groupby(table_view const& keys,
127  null_policy null_handling = null_policy::EXCLUDE,
128  sorted keys_are_sorted = sorted::NO,
129  std::vector<order> const& column_order = {},
130  std::vector<null_order> const& null_precedence = {});
131 
185  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> aggregate(
188 
194  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> aggregate(
196  rmm::cuda_stream_view stream,
249  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> scan(
252 
303  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> shift(
304  table_view const& values,
306  std::vector<std::reference_wrapper<scalar const>> const& fill_values,
308 
317  struct groups {
318  std::unique_ptr<table> keys;
319  std::vector<size_type> offsets;
320  std::unique_ptr<table> values;
321  };
322 
337 
373  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> replace_nulls(
374  table_view const& values,
375  host_span<cudf::replace_policy const> replace_policies,
377 
378  private:
379  table_view _keys;
380  null_policy _include_null_keys{null_policy::EXCLUDE};
382  sorted _keys_are_sorted{sorted::NO};
383  std::vector<order> _column_order{};
385  std::vector<null_order> _null_precedence{};
388  std::unique_ptr<detail::sort::sort_groupby_helper>
389  _helper;
391 
398  detail::sort::sort_groupby_helper& helper();
399 
404  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> dispatch_aggregation(
405  host_span<aggregation_request const> requests,
406  rmm::cuda_stream_view stream,
408 
409  // Sort-based groupby
410  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_aggregate(
411  host_span<aggregation_request const> requests,
412  rmm::cuda_stream_view stream,
414 
415  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_scan(
416  host_span<scan_request const> requests,
417  rmm::cuda_stream_view stream,
419 };
421 } // namespace groupby
422 } // namespace cudf
Representation for specifying desired aggregations from aggregation-based APIs, e....
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Groups values by keys and computes aggregations on those groups.
Definition: groupby.hpp:94
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > aggregate(host_span< aggregation_request const > requests, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped aggregations on the specified values.
std::pair< std::unique_ptr< table >, std::unique_ptr< table > > shift(table_view const &values, host_span< size_type const > offsets, std::vector< std::reference_wrapper< scalar const >> const &fill_values, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped shifts for specified values.
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > scan(host_span< scan_request const > requests, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped scans on the specified values.
groups get_groups(cudf::table_view values={}, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Get the grouped keys and values corresponding to a groupby operation on a set of values.
groupby(table_view const &keys, null_policy null_handling=null_policy::EXCLUDE, sorted keys_are_sorted=sorted::NO, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={})
Construct a groupby object with the specified keys
std::pair< std::unique_ptr< table >, std::unique_ptr< table > > replace_nulls(table_view const &values, host_span< cudf::replace_policy const > replace_policies, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped replace nulls on value.
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > aggregate(host_span< aggregation_request const > requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped aggregations on the specified values.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
column view class definitions
std::unique_ptr< table > sort(table_view const &input, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs a lexicographic sort of the rows of a table.
device_memory_resource * get_current_device_resource()
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:110
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:151
@ EXCLUDE
exclude null elements
cuDF interfaces
Definition: aggregation.hpp:34
Request for groupby aggregation(s) to perform on a column.
Definition: groupby.hpp:59
std::vector< std::unique_ptr< groupby_aggregation > > aggregations
Desired aggregations.
Definition: groupby.hpp:61
column_view values
The elements to aggregate.
Definition: groupby.hpp:60
The result(s) of an aggregation_request
Definition: groupby.hpp:86
std::vector< std::unique_ptr< column > > results
Columns of results from an aggregation_request
Definition: groupby.hpp:88
The grouped data corresponding to a groupby operation on a set of values.
Definition: groupby.hpp:317
std::unique_ptr< table > keys
Table of grouped keys.
Definition: groupby.hpp:318
std::vector< size_type > offsets
Group Offsets.
Definition: groupby.hpp:319
std::unique_ptr< table > values
Table of grouped values.
Definition: groupby.hpp:320
Request for groupby aggregation(s) for scanning a column.
Definition: groupby.hpp:74
column_view values
The elements to aggregate.
Definition: groupby.hpp:75
std::vector< std::unique_ptr< groupby_scan_aggregation > > aggregations
Desired aggregations.
Definition: groupby.hpp:76
C++20 std::span with reduced feature set.
Definition: span.hpp:210
Class definitions for (mutable)_table_view
Type declarations for libcudf.