groupby.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/aggregation.hpp>
21 #include <cudf/replace.hpp>
23 #include <cudf/types.hpp>
24 #include <cudf/utilities/span.hpp>
25 
26 #include <rmm/cuda_stream_view.hpp>
28 
29 #include <memory>
30 #include <utility>
31 #include <vector>
32 
33 namespace cudf {
35 namespace groupby {
36 namespace detail {
37 namespace sort {
38 class sort_groupby_helper;
39 
40 } // namespace sort
41 } // namespace detail
42 
61  std::vector<std::unique_ptr<groupby_aggregation>> aggregations;
62 };
63 
74 struct scan_request {
76  std::vector<std::unique_ptr<groupby_scan_aggregation>> aggregations;
77 };
78 
88  std::vector<std::unique_ptr<column>> results{};
89 };
90 
94 class groupby {
95  public:
96  groupby() = delete;
97  ~groupby();
98  groupby(groupby const&) = delete;
99  groupby(groupby&&) = delete;
100  groupby& operator=(groupby const&) = delete;
101  groupby& operator=(groupby&&) = delete;
102 
126  explicit groupby(table_view const& keys,
127  null_policy null_handling = null_policy::EXCLUDE,
128  sorted keys_are_sorted = sorted::NO,
129  std::vector<order> const& column_order = {},
130  std::vector<null_order> const& null_precedence = {});
131 
185  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> aggregate(
187  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
188 
240  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> scan(
242  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
243 
294  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> shift(
295  table_view const& values,
297  std::vector<std::reference_wrapper<const scalar>> const& fill_values,
298  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
299 
308  struct groups {
309  std::unique_ptr<table> keys;
310  std::vector<size_type> offsets;
311  std::unique_ptr<table> values;
312  };
313 
327  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
328 
364  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> replace_nulls(
365  table_view const& values,
366  host_span<cudf::replace_policy const> replace_policies,
367  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
368 
369  private:
370  table_view _keys;
371  null_policy _include_null_keys{null_policy::EXCLUDE};
372  sorted _keys_are_sorted{sorted::NO};
374  std::vector<order> _column_order{};
375  std::vector<null_order> _null_precedence{};
377  std::unique_ptr<detail::sort::sort_groupby_helper>
380  _helper;
381 
389  detail::sort::sort_groupby_helper& helper();
390 
395  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> dispatch_aggregation(
396  host_span<aggregation_request const> requests,
397  rmm::cuda_stream_view stream,
399 
400  // Sort-based groupby
401  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_aggregate(
402  host_span<aggregation_request const> requests,
403  rmm::cuda_stream_view stream,
405 
406  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_scan(
407  host_span<scan_request const> requests,
408  rmm::cuda_stream_view stream,
410 };
412 } // namespace groupby
413 } // namespace cudf
per_device_resource.hpp
table_view.hpp
Class definitions for (mutable)_table_view
cudf::groupby::groupby
Groups values by keys and computes aggregations on those groups.
Definition: groupby.hpp:94
cudf::groupby::groupby::replace_nulls
std::pair< std::unique_ptr< table >, std::unique_ptr< table > > replace_nulls(table_view const &values, host_span< cudf::replace_policy const > replace_policies, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped replace nulls on value.
cudf::null_policy
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:111
cudf::column_view
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:313
cudf::groupby::groupby::groups
The grouped data corresponding to a groupby operation on a set of values.
Definition: groupby.hpp:308
cudf::host_span
C++20 std::span with reduced feature set.
Definition: span.hpp:210
types.hpp
Type declarations for libcudf.
rmm::cuda_stream_view
cudf::groupby::aggregation_request::values
column_view values
The elements to aggregate.
Definition: groupby.hpp:60
replace.hpp
cudf::groupby::groupby::groups::values
std::unique_ptr< table > values
Table of grouped values.
Definition: groupby.hpp:311
cudf::groupby::groupby::scan
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > scan(host_span< scan_request const > requests, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped scans on the specified values.
cudf::table_view
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
cudf::groupby::aggregation_result
The result(s) of an aggregation_request
Definition: groupby.hpp:86
cudf::groupby::groupby::groups::keys
std::unique_ptr< table > keys
Table of grouped keys.
Definition: groupby.hpp:309
cudf::groupby::scan_request
Request for groupby aggregation(s) for scanning a column.
Definition: groupby.hpp:74
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::groupby::aggregation_request::aggregations
std::vector< std::unique_ptr< groupby_aggregation > > aggregations
Desired aggregations.
Definition: groupby.hpp:61
cudf::groupby::groupby::groupby
groupby(table_view const &keys, null_policy null_handling=null_policy::EXCLUDE, sorted keys_are_sorted=sorted::NO, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={})
Construct a groupby object with the specified keys
cudf::sorted
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:152
cudf::groupby::aggregation_result::results
std::vector< std::unique_ptr< column > > results
Columns of results from an aggregation_request
Definition: groupby.hpp:88
cudf::groupby::groupby::shift
std::pair< std::unique_ptr< table >, std::unique_ptr< table > > shift(table_view const &values, host_span< size_type const > offsets, std::vector< std::reference_wrapper< const scalar >> const &fill_values, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped shifts for specified values.
aggregation.hpp
Representation for specifying desired aggregations from aggregation-based APIs, e....
rmm::mr::device_memory_resource
cudf::groupby::scan_request::values
column_view values
The elements to aggregate.
Definition: groupby.hpp:75
cudf::sort
std::unique_ptr< table > sort(table_view const &input, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={}, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs a lexicographic sort of the rows of a table.
cudf::groupby::groupby::aggregate
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > aggregate(host_span< aggregation_request const > requests, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped aggregations on the specified values.
cudf::groupby::aggregation_request
Request for groupby aggregation(s) to perform on a column.
Definition: groupby.hpp:59
column_view.hpp
column view class definitions
cudf::groupby::groupby::groups::offsets
std::vector< size_type > offsets
Group Offsets.
Definition: groupby.hpp:310
cudf::groupby::scan_request::aggregations
std::vector< std::unique_ptr< groupby_scan_aggregation > > aggregations
Desired aggregations.
Definition: groupby.hpp:76
cudf::groupby::groupby::get_groups
groups get_groups(cudf::table_view values={}, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Get the grouped keys and values corresponding to a groupby operation on a set of values.