stream_compaction.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/types.hpp>
21 
22 #include <rmm/mr/device/per_device_resource.hpp>
23 #include <rmm/resource_ref.hpp>
24 
25 #include <memory>
26 #include <vector>
27 
28 namespace cudf {
73 std::unique_ptr<table> drop_nulls(
74  table_view const& input,
75  std::vector<size_type> const& keys,
76  cudf::size_type keep_threshold,
77  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
78 
105 std::unique_ptr<table> drop_nulls(
106  table_view const& input,
107  std::vector<size_type> const& keys,
108  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
109 
147 std::unique_ptr<table> drop_nans(
148  table_view const& input,
149  std::vector<size_type> const& keys,
150  cudf::size_type keep_threshold,
151  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
152 
180 std::unique_ptr<table> drop_nans(
181  table_view const& input,
182  std::vector<size_type> const& keys,
183  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
184 
206 std::unique_ptr<table> apply_boolean_mask(
207  table_view const& input,
208  column_view const& boolean_mask,
209  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
210 
215  KEEP_ANY = 0,
216  KEEP_FIRST,
217  KEEP_LAST,
218  KEEP_NONE
219 };
220 
248 std::unique_ptr<table> unique(
249  table_view const& input,
250  std::vector<size_type> const& keys,
252  null_equality nulls_equal = null_equality::EQUAL,
253  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
254 
274 std::unique_ptr<table> distinct(
275  table_view const& input,
276  std::vector<size_type> const& keys,
278  null_equality nulls_equal = null_equality::EQUAL,
280  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
281 
296 std::unique_ptr<column> distinct_indices(
297  table_view const& input,
299  null_equality nulls_equal = null_equality::EQUAL,
301  rmm::cuda_stream_view stream = cudf::get_default_stream(),
302  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
303 
326 std::unique_ptr<table> stable_distinct(
327  table_view const& input,
328  std::vector<size_type> const& keys,
330  null_equality nulls_equal = null_equality::EQUAL,
332  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
333 
350  null_policy null_handling,
351  nan_policy nan_handling);
352 
363  null_equality nulls_equal = null_equality::EQUAL);
364 
386  null_policy null_handling,
387  nan_policy nan_handling);
388 
399  null_equality nulls_equal = null_equality::EQUAL);
400 
402 } // namespace cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< table > drop_nans(table_view const &input, std::vector< size_type > const &keys, cudf::size_type keep_threshold, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Filters a table to remove NANs with threshold count.
std::unique_ptr< table > unique(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep, null_equality nulls_equal=null_equality::EQUAL, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Create a new table with consecutive duplicate rows removed.
std::unique_ptr< table > drop_nulls(table_view const &input, std::vector< size_type > const &keys, cudf::size_type keep_threshold, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Filters a table to remove null elements with threshold count.
cudf::size_type unique_count(column_view const &input, null_policy null_handling, nan_policy nan_handling)
Count the number of consecutive groups of equivalent rows in a column.
duplicate_keep_option
Choices for drop_duplicates API for retainment of duplicate rows.
std::unique_ptr< table > distinct(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Create a new table without duplicate rows.
std::unique_ptr< table > apply_boolean_mask(table_view const &input, column_view const &boolean_mask, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Filters input using boolean_mask of boolean values as a mask.
cudf::size_type distinct_count(column_view const &input, null_policy null_handling, nan_policy nan_handling)
Count the distinct elements in the column_view.
std::unique_ptr< table > stable_distinct(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Create a new table without duplicate rows, preserving input order.
std::unique_ptr< column > distinct_indices(table_view const &input, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Create a column of indices of all distinct rows in the input table.
@ KEEP_ANY
Keep an unspecified occurrence.
@ KEEP_NONE
Keep no (remove all) occurrences of duplicates.
@ KEEP_LAST
Keep last occurrence.
@ KEEP_FIRST
Keep first occurrence.
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:149
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:124
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:132
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:141
@ EQUAL
nulls compare equal
@ ALL_EQUAL
All NaNs compare equal, regardless of sign.
cuDF interfaces
Definition: aggregation.hpp:34
Type declarations for libcudf.