column_factories.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <cudf/column/column.hpp>
8 #include <cudf/types.hpp>
11 #include <cudf/utilities/span.hpp>
13 
14 #include <rmm/cuda_stream_view.hpp>
15 
16 #include <cuda/std/utility>
17 
18 namespace CUDF_EXPORT cudf {
34 std::unique_ptr<column> make_empty_column(data_type type);
35 
44 std::unique_ptr<column> make_empty_column(type_id id);
45 
64 std::unique_ptr<column> make_numeric_column(
65  data_type type,
66  size_type size,
67  mask_state state = mask_state::UNALLOCATED,
70 
88 template <typename B>
89 std::unique_ptr<column> make_numeric_column(
90  data_type type,
91  size_type size,
92  B&& null_mask,
96 {
97  CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
98  return std::make_unique<column>(type,
99  size,
100  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
101  std::forward<B>(null_mask),
102  null_count);
103 }
104 
122 std::unique_ptr<column> make_fixed_point_column(
123  data_type type,
124  size_type size,
125  mask_state state = mask_state::UNALLOCATED,
128 
145 template <typename B>
146 std::unique_ptr<column> make_fixed_point_column(
147  data_type type,
148  size_type size,
149  B&& null_mask,
153 {
154  CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
155  return std::make_unique<column>(type,
156  size,
157  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
158  std::forward<B>(null_mask),
159  null_count);
160 }
161 
180 std::unique_ptr<column> make_timestamp_column(
181  data_type type,
182  size_type size,
183  mask_state state = mask_state::UNALLOCATED,
186 
204 template <typename B>
205 std::unique_ptr<column> make_timestamp_column(
206  data_type type,
207  size_type size,
208  B&& null_mask,
212 {
213  CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
214  return std::make_unique<column>(type,
215  size,
216  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
217  std::forward<B>(null_mask),
218  null_count);
219 }
220 
239 std::unique_ptr<column> make_duration_column(
240  data_type type,
241  size_type size,
242  mask_state state = mask_state::UNALLOCATED,
245 
263 template <typename B>
264 std::unique_ptr<column> make_duration_column(
265  data_type type,
266  size_type size,
267  B&& null_mask,
271 {
272  CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
273  return std::make_unique<column>(type,
274  size,
275  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
276  std::forward<B>(null_mask),
277  null_count);
278 }
279 
298 std::unique_ptr<column> make_fixed_width_column(
299  data_type type,
300  size_type size,
301  mask_state state = mask_state::UNALLOCATED,
304 
322 template <typename B>
323 std::unique_ptr<column> make_fixed_width_column(
324  data_type type,
325  size_type size,
326  B&& null_mask,
330 {
331  CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type.");
332  if (is_timestamp(type)) {
333  return make_timestamp_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
334  } else if (is_duration(type)) {
335  return make_duration_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
336  } else if (is_fixed_point(type)) {
337  return make_fixed_point_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
338  }
339  return make_numeric_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
340 }
341 
365 std::unique_ptr<column> make_strings_column(
366  cudf::device_span<cuda::std::pair<char const*, size_type> const> strings,
369 
385 std::vector<std::unique_ptr<column>> make_strings_column_batch(
386  std::vector<cudf::device_span<cuda::std::pair<char const*, size_type> const>> const& input,
389 
416 std::unique_ptr<column> make_strings_column(
418  string_view const null_placeholder,
421 
439 std::unique_ptr<column> make_strings_column(size_type num_strings,
440  std::unique_ptr<column> offsets_column,
441  rmm::device_buffer&& chars_buffer,
443  rmm::device_buffer&& null_mask);
444 
501 std::unique_ptr<cudf::column> make_lists_column(
502  size_type num_rows,
503  std::unique_ptr<column> offsets_column,
504  std::unique_ptr<column> child_column,
506  rmm::device_buffer&& null_mask,
509 
520 std::unique_ptr<column> make_empty_lists_column(
521  data_type child_type,
524 
548 std::unique_ptr<cudf::column> make_structs_column(
549  size_type num_rows,
550  std::vector<std::unique_ptr<column>>&& child_columns,
552  rmm::device_buffer&& null_mask,
555 
582 std::unique_ptr<cudf::column> create_structs_hierarchy(
583  size_type num_rows,
584  std::vector<std::unique_ptr<column>>&& child_columns,
586  rmm::device_buffer&& null_mask,
589 
604 std::unique_ptr<column> make_column_from_scalar(
605  scalar const& s,
606  size_type size,
609 
624 std::unique_ptr<column> make_dictionary_from_scalar(
625  scalar const& s,
626  size_type size,
629  // end of group
631 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:269
An owning class to represent a singular value.
Definition: scalar.hpp:40
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:33
Class definition for cudf::column.
std::unique_ptr< column > make_fixed_width_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed w...
std::unique_ptr< cudf::column > make_structs_column(size_type num_rows, std::vector< std::unique_ptr< column >> &&child_columns, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a STRUCT column using specified child columns as members.
std::unique_ptr< column > make_dictionary_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a dictionary column with size elements that are all equal to the given scalar.
std::unique_ptr< cudf::column > create_structs_hierarchy(size_type num_rows, std::vector< std::unique_ptr< column >> &&child_columns, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a STRUCT column using specified child columns as members.
std::unique_ptr< column > make_numeric_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified numeric...
std::unique_ptr< column > make_duration_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified duratio...
std::unique_ptr< column > make_timestamp_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified timesta...
std::unique_ptr< column > make_empty_column(type_id id)
Creates an empty column of the specified type.
std::unique_ptr< column > make_empty_lists_column(data_type child_type, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create an empty LIST column.
std::unique_ptr< cudf::column > make_lists_column(size_type num_rows, std::unique_ptr< column > offsets_column, std::unique_ptr< column > child_column, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a LIST type column given offsets column, child column, null mask and null count.
std::unique_ptr< column > make_column_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a column with size elements that are all equal to the given scalar.
std::unique_ptr< column > make_strings_column(size_type num_strings, std::unique_ptr< column > offsets_column, rmm::device_buffer &&chars_buffer, size_type null_count, rmm::device_buffer &&null_mask)
Construct a STRING type column given offsets column, chars columns, and null mask and null count.
std::vector< std::unique_ptr< column > > make_strings_column_batch(std::vector< cudf::device_span< cuda::std::pair< char const *, size_type > const >> const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a batch of STRING type columns given an array of device spans of pointer/size pairs.
std::unique_ptr< column > make_fixed_point_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed_p...
cudf::size_type null_count(bitmask_type const *bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream=cudf::get_default_stream())
Given a validity bitmask, counts the number of null elements (unset bits) in the range [start,...
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
std::unique_ptr< column > is_timestamp(strings_column_view const &input, std::string_view format, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Verifies the given strings column can be parsed to timestamps using the provided format pattern.
std::unique_ptr< column > is_fixed_point(strings_column_view const &input, data_type decimal_type=data_type{type_id::DECIMAL64}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a boolean column identifying strings in which all characters are valid for conversion to fixe...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:143
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:170
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
constexpr CUDF_HOST_DEVICE bool is_fixed_width()
Indicates whether elements of type T are fixed-width.
Definition: traits.hpp:558
constexpr CUDF_HOST_DEVICE bool is_duration()
Indicates whether the type T is a duration type.
Definition: traits.hpp:466
type_id
Identifies a column's logical element type.
Definition: types.hpp:192
constexpr CUDF_HOST_DEVICE bool is_numeric()
Indicates whether the type T is a numeric type.
Definition: traits.hpp:169
cuDF interfaces
Definition: host_udf.hpp:26
APIs for spans.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:323
Type declarations for libcudf.