groupby.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2021, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/aggregation.hpp>
21 #include <cudf/replace.hpp>
23 #include <cudf/types.hpp>
24 #include <cudf/utilities/span.hpp>
25 
26 #include <memory>
27 #include <rmm/cuda_stream_view.hpp>
28 
29 #include <utility>
30 #include <vector>
31 
32 namespace cudf {
34 namespace groupby {
35 namespace detail {
36 namespace sort {
37 class sort_groupby_helper;
38 
39 } // namespace sort
40 } // namespace detail
41 
60  std::vector<std::unique_ptr<groupby_aggregation>> aggregations;
61 };
62 
73 struct scan_request {
75  std::vector<std::unique_ptr<groupby_scan_aggregation>> aggregations;
76 };
77 
87  std::vector<std::unique_ptr<column>> results{};
88 };
89 
93 class groupby {
94  public:
95  groupby() = delete;
96  ~groupby();
97  groupby(groupby const&) = delete;
98  groupby(groupby&&) = delete;
99  groupby& operator=(groupby const&) = delete;
100  groupby& operator=(groupby&&) = delete;
101 
125  explicit groupby(table_view const& keys,
126  null_policy null_handling = null_policy::EXCLUDE,
127  sorted keys_are_sorted = sorted::NO,
128  std::vector<order> const& column_order = {},
129  std::vector<null_order> const& null_precedence = {});
130 
184  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> aggregate(
186  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
187 
239  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> scan(
241  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
242 
293  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> shift(
294  table_view const& values,
296  std::vector<std::reference_wrapper<const scalar>> const& fill_values,
297  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
298 
307  struct groups {
308  std::unique_ptr<table> keys;
309  std::vector<size_type> offsets;
310  std::unique_ptr<table> values;
311  };
312 
326  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
327 
363  std::pair<std::unique_ptr<table>, std::unique_ptr<table>> replace_nulls(
364  table_view const& values,
365  host_span<cudf::replace_policy const> replace_policies,
366  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
367 
368  private:
369  table_view _keys;
370  null_policy _include_null_keys{null_policy::EXCLUDE};
371  sorted _keys_are_sorted{sorted::NO};
373  std::vector<order> _column_order{};
374  std::vector<null_order> _null_precedence{};
376  std::unique_ptr<detail::sort::sort_groupby_helper>
379  _helper;
380 
388  detail::sort::sort_groupby_helper& helper();
389 
394  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> dispatch_aggregation(
395  host_span<aggregation_request const> requests,
396  rmm::cuda_stream_view stream,
398 
399  // Sort-based groupby
400  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_aggregate(
401  host_span<aggregation_request const> requests,
402  rmm::cuda_stream_view stream,
404 
405  std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> sort_scan(
406  host_span<scan_request const> requests,
407  rmm::cuda_stream_view stream,
409 };
411 } // namespace groupby
412 } // namespace cudf
table_view.hpp
Class definitions for (mutable)_table_view
cudf::groupby::groupby
Groups values by keys and computes aggregations on those groups.
Definition: groupby.hpp:93
cudf::groupby::groupby::replace_nulls
std::pair< std::unique_ptr< table >, std::unique_ptr< table > > replace_nulls(table_view const &values, host_span< cudf::replace_policy const > replace_policies, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped replace nulls on value.
cudf::null_policy
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:123
cudf::column_view
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:300
cudf::groupby::groupby::groups
The grouped data corresponding to a groupby operation on a set of values.
Definition: groupby.hpp:307
cudf::host_span
Definition: span.hpp:130
types.hpp
Type declarations for libcudf.
rmm::cuda_stream_view
cudf::groupby::aggregation_request::values
column_view values
The elements to aggregate.
Definition: groupby.hpp:59
replace.hpp
cudf::groupby::groupby::scan
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > scan(host_span< scan_request const > requests, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped scans on the specified values.
cudf::table_view
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:154
cudf::groupby::aggregation_result
The result(s) of an aggregation_request
Definition: groupby.hpp:85
cudf::groupby::scan_request
Request for groupby aggregation(s) for scanning a column.
Definition: groupby.hpp:73
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::groupby::aggregation_request::aggregations
std::vector< std::unique_ptr< groupby_aggregation > > aggregations
Desired aggregations.
Definition: groupby.hpp:60
cudf::groupby::groupby::groupby
groupby(table_view const &keys, null_policy null_handling=null_policy::EXCLUDE, sorted keys_are_sorted=sorted::NO, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={})
Construct a groupby object with the specified keys
cudf::sorted
sorted
Indicates whether a collection of values is known to be sorted.
Definition: types.hpp:164
cudf::groupby::aggregation_result::results
std::vector< std::unique_ptr< column > > results
Columns of results from an aggregation_request
Definition: groupby.hpp:87
cudf::groupby::groupby::shift
std::pair< std::unique_ptr< table >, std::unique_ptr< table > > shift(table_view const &values, host_span< size_type const > offsets, std::vector< std::reference_wrapper< const scalar >> const &fill_values, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped shifts for specified values.
aggregation.hpp
Representation for specifying desired aggregations from aggregation-based APIs, e....
rmm::mr::device_memory_resource
cudf::groupby::scan_request::values
column_view values
The elements to aggregate.
Definition: groupby.hpp:74
cudf::sort
std::unique_ptr< table > sort(table_view const &input, std::vector< order > const &column_order={}, std::vector< null_order > const &null_precedence={}, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs a lexicographic sort of the rows of a table.
cudf::groupby::groupby::aggregate
std::pair< std::unique_ptr< table >, std::vector< aggregation_result > > aggregate(host_span< aggregation_request const > requests, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Performs grouped aggregations on the specified values.
cudf::groupby::aggregation_request
Request for groupby aggregation(s) to perform on a column.
Definition: groupby.hpp:58
column_view.hpp
column view class definitions
cudf::groupby::scan_request::aggregations
std::vector< std::unique_ptr< groupby_scan_aggregation > > aggregations
Desired aggregations.
Definition: groupby.hpp:75
cudf::groupby::groupby::get_groups
groups get_groups(cudf::table_view values={}, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Get the grouped keys and values corresponding to a groupby operation on a set of values.