transform.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/ast/expressions.hpp>
20 #include <cudf/types.hpp>
21 
22 #include <rmm/mr/device/per_device_resource.hpp>
23 
24 #include <memory>
25 
26 namespace cudf {
52 std::unique_ptr<column> transform(
53  column_view const& input,
54  std::string const& unary_udf,
55  data_type output_type,
56  bool is_ptx,
57  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
58 
70 std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
71  column_view const& input,
72  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
73 
87 std::unique_ptr<column> compute_column(
88  table_view const& table,
89  ast::expression const& expr,
90  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
91 
107 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
108  column_view const& input,
109  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
110 
135 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
136  cudf::table_view const& input,
137  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
138 
166 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
167  column_view const& input,
168  column_view const& categories,
169  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
170 
192 std::unique_ptr<column> mask_to_bools(
193  bitmask_type const* bitmask,
194  size_type begin_bit,
195  size_type end_bit,
196  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
197 
223 std::unique_ptr<column> row_bit_count(
224  table_view const& t,
225  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
226 
245 std::unique_ptr<column> segmented_row_bit_count(
246  table_view const& t,
247  size_type segment_length,
248  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
249  // end of group
251 } // namespace cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Indicator for the logical data type of an element in a column.
Definition: types.hpp:241
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
A set of cudf::column's of the same size.
Definition: table.hpp:40
std::pair< std::unique_ptr< column >, table_view > one_hot_encode(column_view const &input, column_view const &categories, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Encodes input by generating a new column for each value in categories indicating the presence of that...
std::pair< std::unique_ptr< rmm::device_buffer >, cudf::size_type > bools_to_mask(column_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a bitmask from a column of boolean elements.
std::pair< std::unique_ptr< cudf::table >, std::unique_ptr< cudf::column > > encode(cudf::table_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Encode the rows of the given table as integers.
std::unique_ptr< column > row_bit_count(table_view const &t, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Returns an approximate cumulative size in bits of all columns in the table_view for each row.
std::unique_ptr< column > segmented_row_bit_count(table_view const &t, size_type segment_length, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Returns an approximate cumulative size in bits of all columns in the table_view for each segment of r...
std::unique_ptr< column > compute_column(table_view const &table, ast::expression const &expr, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Compute a new column by evaluating an expression tree on a table.
std::unique_ptr< column > mask_to_bools(bitmask_type const *bitmask, size_type begin_bit, size_type end_bit, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a boolean column from given bitmask.
std::pair< std::unique_ptr< rmm::device_buffer >, size_type > nans_to_nulls(column_view const &input, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a null_mask from input by converting NaN to null and preserving existing null values and also...
std::unique_ptr< column > transform(column_view const &input, std::string const &unary_udf, data_type output_type, bool is_ptx, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a new column by applying a unary function against every element of an input column.
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:94
cuDF interfaces
Definition: aggregation.hpp:34
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:46
Type declarations for libcudf.