transform.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
9 #include <cudf/column/scalar_column_view.hpp>
10 #include <cudf/types.hpp>
11 #include <cudf/utilities/export.hpp>
13 
14 #include <memory>
15 #include <optional>
16 #include <variant>
17 #include <vector>
18 
19 namespace CUDF_EXPORT cudf {
20 
59 [[deprecated("Use transform_extended instead")]] std::unique_ptr<column> transform(
60  std::vector<column_view> const& inputs,
61  std::string const& transform_udf,
62  data_type output_type,
63  bool is_ptx,
64  std::optional<void*> user_data = std::nullopt,
65  null_aware is_null_aware = null_aware::NO,
66  output_nullability null_policy = output_nullability::PRESERVE,
69 
104 std::unique_ptr<column> transform_extended(
105  std::span<std::variant<column_view, scalar_column_view> const> inputs,
106  std::string const& udf,
107  data_type output_type,
108  bool is_ptx,
109  std::optional<void*> user_data = std::nullopt,
110  null_aware is_null_aware = null_aware::NO,
111  std::optional<size_type> row_size = std::nullopt,
112  output_nullability null_policy = output_nullability::PRESERVE,
115 
130 [[deprecated]] std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
131  column_view const& input,
134 
146 std::unique_ptr<column> column_nans_to_nulls(
147  column_view const& input,
150 
165 std::unique_ptr<column> compute_column(
166  table_view const& table,
167  ast::expression const& expr,
170 
186 std::unique_ptr<column> compute_column_jit(
187  table_view const& table,
188  ast::expression const& expr,
191 
208 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
209  column_view const& input,
212 
238 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
239  cudf::table_view const& input,
242 
271 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
272  column_view const& input,
273  column_view const& categories,
276 
299 std::unique_ptr<column> mask_to_bools(
300  bitmask_type const* bitmask,
301  size_type begin_bit,
302  size_type end_bit,
305 
332 std::unique_ptr<column> row_bit_count(
333  table_view const& t,
336 
356 std::unique_ptr<column> segmented_row_bit_count(
357  table_view const& t,
358  size_type segment_length,
361  // end of group
363 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Indicator for the logical data type of an element in a column.
Definition: types.hpp:269
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
std::pair< std::unique_ptr< cudf::table >, std::unique_ptr< cudf::column > > encode(cudf::table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Encode the rows of the given table as integers.
std::unique_ptr< column > row_bit_count(table_view const &t, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns an approximate cumulative size in bits of all columns in the table_view for each row.
std::pair< std::unique_ptr< rmm::device_buffer >, cudf::size_type > bools_to_mask(column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a bitmask from a column of boolean elements.
std::pair< std::unique_ptr< column >, table_view > one_hot_encode(column_view const &input, column_view const &categories, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Encodes input by generating a new column for each value in categories indicating the presence of that...
std::unique_ptr< column > mask_to_bools(bitmask_type const *bitmask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a boolean column from given bitmask.
std::unique_ptr< column > transform(std::vector< column_view > const &inputs, std::string const &transform_udf, data_type output_type, bool is_ptx, std::optional< void * > user_data=std::nullopt, null_aware is_null_aware=null_aware::NO, output_nullability null_policy=output_nullability::PRESERVE, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a new column by applying a transform function against every element of the input columns.
std::unique_ptr< column > transform_extended(std::span< std::variant< column_view, scalar_column_view > const > inputs, std::string const &udf, data_type output_type, bool is_ptx, std::optional< void * > user_data=std::nullopt, null_aware is_null_aware=null_aware::NO, std::optional< size_type > row_size=std::nullopt, output_nullability null_policy=output_nullability::PRESERVE, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a new column by applying a transform function against every element of the input columns.
std::pair< std::unique_ptr< rmm::device_buffer >, size_type > nans_to_nulls(column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a null_mask from input by converting NaN to null and preserving existing null values and also...
std::unique_ptr< column > compute_column(table_view const &table, ast::expression const &expr, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Compute a new column by evaluating an expression tree on a table.
std::unique_ptr< column > column_nans_to_nulls(column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a null_mask from input by converting NaN elements to null rows and preserving existing null v...
std::unique_ptr< column > segmented_row_bit_count(table_view const &t, size_type segment_length, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns an approximate cumulative size in bits of all columns in the table_view for each segment of r...
std::unique_ptr< column > compute_column_jit(table_view const &table, ast::expression const &expr, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Compute a new column by evaluating an expression tree on a table using a JIT-compiled kernel.
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:115
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:85
output_nullability
Indicates the null output policy of a function.
Definition: types.hpp:257
null_aware
A function is null-aware if its output value uses the input validity.
Definition: types.hpp:231
cuDF interfaces
Definition: host_udf.hpp:26
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:62
Type declarations for libcudf.