stream_compaction.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/types.hpp>
21 #include <cudf/utilities/export.hpp>
23 
24 #include <memory>
25 #include <optional>
26 #include <vector>
27 
28 namespace CUDF_EXPORT cudf {
36 namespace ast {
37 class expression;
38 }
39 
78 std::unique_ptr<table> drop_nulls(
79  table_view const& input,
80  std::vector<size_type> const& keys,
81  cudf::size_type keep_threshold,
84 
112 std::unique_ptr<table> drop_nulls(
113  table_view const& input,
114  std::vector<size_type> const& keys,
117 
156 std::unique_ptr<table> drop_nans(
157  table_view const& input,
158  std::vector<size_type> const& keys,
159  cudf::size_type keep_threshold,
162 
191 std::unique_ptr<table> drop_nans(
192  table_view const& input,
193  std::vector<size_type> const& keys,
196 
219 std::unique_ptr<table> apply_boolean_mask(
220  table_view const& input,
221  column_view const& boolean_mask,
224 
229  KEEP_ANY = 0,
230  KEEP_FIRST,
231  KEEP_LAST,
232  KEEP_NONE
233 };
234 
263 std::unique_ptr<table> unique(
264  table_view const& input,
265  std::vector<size_type> const& keys,
267  null_equality nulls_equal = null_equality::EQUAL,
270 
291 std::unique_ptr<table> distinct(
292  table_view const& input,
293  std::vector<size_type> const& keys,
294  duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
295  null_equality nulls_equal = null_equality::EQUAL,
296  nan_equality nans_equal = nan_equality::ALL_EQUAL,
299 
314 std::unique_ptr<column> distinct_indices(
315  table_view const& input,
316  duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
317  null_equality nulls_equal = null_equality::EQUAL,
318  nan_equality nans_equal = nan_equality::ALL_EQUAL,
321 
345 std::unique_ptr<table> stable_distinct(
346  table_view const& input,
347  std::vector<size_type> const& keys,
348  duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
349  null_equality nulls_equal = null_equality::EQUAL,
350  nan_equality nans_equal = nan_equality::ALL_EQUAL,
353 
371  null_policy null_handling,
372  nan_policy nan_handling,
374 
386  null_equality nulls_equal = null_equality::EQUAL,
388 
411  null_policy null_handling,
412  nan_policy nan_handling,
414 
426  null_equality nulls_equal = null_equality::EQUAL,
428 
462 std::vector<std::unique_ptr<column>> filter(
463  std::vector<column_view> const& predicate_columns,
464  std::string const& predicate_udf,
465  std::vector<column_view> const& filter_columns,
466  bool is_ptx,
467  std::optional<void*> user_data = std::nullopt,
468  null_aware is_null_aware = null_aware::NO,
471 
492 std::unique_ptr<table> filter(
493  table_view const& predicate_table,
494  ast::expression const& predicate_expr,
495  table_view const& filter_table,
498 
500 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::async_resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
std::unique_ptr< table > filter(table_view const &predicate_table, ast::expression const &predicate_expr, table_view const &filter_table, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates new table by applying a filter function against every element of the input columns.
std::unique_ptr< table > drop_nulls(table_view const &input, std::vector< size_type > const &keys, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters a table to remove null elements.
std::unique_ptr< column > distinct_indices(table_view const &input, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a column of indices of all distinct rows in the input table.
std::unique_ptr< table > unique(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep, null_equality nulls_equal=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a new table with consecutive duplicate rows removed.
std::unique_ptr< table > drop_nans(table_view const &input, std::vector< size_type > const &keys, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters a table to remove NANs.
duplicate_keep_option
Choices for drop_duplicates API for retainment of duplicate rows.
std::unique_ptr< table > apply_boolean_mask(table_view const &input, column_view const &boolean_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters input using boolean_mask of boolean values as a mask.
cudf::size_type distinct_count(table_view const &input, null_equality nulls_equal=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream())
Count the distinct rows in a table.
std::unique_ptr< table > stable_distinct(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a new table without duplicate rows, preserving input order.
std::unique_ptr< table > distinct(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a new table without duplicate rows.
cudf::size_type unique_count(table_view const &input, null_equality nulls_equal=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream())
Count the number of consecutive groups of equivalent rows in a table.
@ KEEP_ANY
Keep an unspecified occurrence.
@ KEEP_NONE
Keep no (remove all) occurrences of duplicates.
@ KEEP_LAST
Keep last occurrence.
@ KEEP_FIRST
Keep first occurrence.
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:126
null_aware
Indicates whether a function is null-aware or not.
Definition: types.hpp:238
nan_policy
Enum to treat NaN floating point value as null or non-null element.
Definition: types.hpp:134
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:143
cuDF interfaces
Definition: host_udf.hpp:37
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:72
Type declarations for libcudf.