stream_compaction.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
9 #include <cudf/column/scalar_column_view.hpp>
10 #include <cudf/types.hpp>
12 #include <cudf/utilities/export.hpp>
14 
15 #include <memory>
16 #include <optional>
17 #include <variant>
18 #include <vector>
19 
20 namespace CUDF_EXPORT cudf {
28 namespace ast {
29 struct expression;
30 }
31 
70 std::unique_ptr<table> drop_nulls(
71  table_view const& input,
72  std::vector<size_type> const& keys,
73  cudf::size_type keep_threshold,
76 
104 std::unique_ptr<table> drop_nulls(
105  table_view const& input,
106  std::vector<size_type> const& keys,
109 
148 std::unique_ptr<table> drop_nans(
149  table_view const& input,
150  std::vector<size_type> const& keys,
151  cudf::size_type keep_threshold,
154 
183 std::unique_ptr<table> drop_nans(
184  table_view const& input,
185  std::vector<size_type> const& keys,
188 
211 std::unique_ptr<table> apply_boolean_mask(
212  table_view const& input,
213  column_view const& boolean_mask,
216 
221  KEEP_ANY = 0,
222  KEEP_FIRST,
223  KEEP_LAST,
224  KEEP_NONE
225 };
226 
255 std::unique_ptr<table> unique(
256  table_view const& input,
257  std::vector<size_type> const& keys,
259  null_equality nulls_equal = null_equality::EQUAL,
262 
283 std::unique_ptr<table> distinct(
284  table_view const& input,
285  std::vector<size_type> const& keys,
286  duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
287  null_equality nulls_equal = null_equality::EQUAL,
288  nan_equality nans_equal = nan_equality::ALL_EQUAL,
291 
306 std::unique_ptr<column> distinct_indices(
307  table_view const& input,
308  duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
309  null_equality nulls_equal = null_equality::EQUAL,
310  nan_equality nans_equal = nan_equality::ALL_EQUAL,
313 
337 std::unique_ptr<table> stable_distinct(
338  table_view const& input,
339  std::vector<size_type> const& keys,
340  duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY,
341  null_equality nulls_equal = null_equality::EQUAL,
342  nan_equality nans_equal = nan_equality::ALL_EQUAL,
345 
380 [[deprecated("Use filter_extended instead")]] std::vector<std::unique_ptr<column>> filter(
381  std::vector<column_view> const& predicate_columns,
382  std::string const& predicate_udf,
383  std::vector<column_view> const& filter_columns,
384  bool is_ptx,
385  std::optional<void*> user_data = std::nullopt,
386  null_aware is_null_aware = null_aware::NO,
387  output_nullability predicate_nullability = output_nullability::PRESERVE,
390 
425 std::vector<std::unique_ptr<column>> filter_extended(
426  std::span<std::variant<column_view, scalar_column_view> const> predicate_inputs,
427  std::string const& predicate_udf,
428  std::vector<column_view> const& filter_columns,
429  bool is_ptx,
430  std::optional<void*> user_data = std::nullopt,
431  null_aware is_null_aware = null_aware::NO,
432  output_nullability predicate_nullability = output_nullability::PRESERVE,
435 
456 std::unique_ptr<table> filter(
457  table_view const& predicate_table,
458  ast::expression const& predicate_expr,
459  table_view const& filter_table,
462 
464 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
column view class definitions
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
std::unique_ptr< table > filter(table_view const &predicate_table, ast::expression const &predicate_expr, table_view const &filter_table, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates new table by applying a filter function against every element of the input columns.
std::unique_ptr< table > drop_nulls(table_view const &input, std::vector< size_type > const &keys, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters a table to remove null elements.
std::unique_ptr< column > distinct_indices(table_view const &input, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a column of indices of all distinct rows in the input table.
std::unique_ptr< table > unique(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep, null_equality nulls_equal=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a new table with consecutive duplicate rows removed.
std::unique_ptr< table > drop_nans(table_view const &input, std::vector< size_type > const &keys, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters a table to remove NANs.
duplicate_keep_option
Choices for drop_duplicates API for retainment of duplicate rows.
std::unique_ptr< table > apply_boolean_mask(table_view const &input, column_view const &boolean_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters input using boolean_mask of boolean values as a mask.
std::unique_ptr< table > stable_distinct(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a new table without duplicate rows, preserving input order.
std::unique_ptr< table > distinct(table_view const &input, std::vector< size_type > const &keys, duplicate_keep_option keep=duplicate_keep_option::KEEP_ANY, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create a new table without duplicate rows.
std::vector< std::unique_ptr< column > > filter_extended(std::span< std::variant< column_view, scalar_column_view > const > predicate_inputs, std::string const &predicate_udf, std::vector< column_view > const &filter_columns, bool is_ptx, std::optional< void * > user_data=std::nullopt, null_aware is_null_aware=null_aware::NO, output_nullability predicate_nullability=output_nullability::PRESERVE, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a new column by applying a filter function against every element of the input columns.
@ KEEP_ANY
Keep an unspecified occurrence.
@ KEEP_NONE
Keep no (remove all) occurrences of duplicates.
@ KEEP_LAST
Keep last occurrence.
@ KEEP_FIRST
Keep first occurrence.
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:140
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
output_nullability
Indicates the null output policy of a function.
Definition: types.hpp:257
null_aware
A function is null-aware if its output value uses the input validity.
Definition: types.hpp:231
nan_equality
Enum to consider different elements (of floating point types) holding NaN value as equal or unequal.
Definition: types.hpp:132
cuDF interfaces
Definition: host_udf.hpp:26
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:62
Type declarations for libcudf.