column_factories.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/column/column.hpp>
19 #include <cudf/types.hpp>
22 #include <cudf/utilities/span.hpp>
24 
25 #include <rmm/cuda_stream_view.hpp>
26 
27 namespace CUDF_EXPORT cudf {
43 std::unique_ptr<column> make_empty_column(data_type type);
44 
53 std::unique_ptr<column> make_empty_column(type_id id);
54 
73 std::unique_ptr<column> make_numeric_column(
74  data_type type,
75  size_type size,
76  mask_state state = mask_state::UNALLOCATED,
79 
97 template <typename B>
98 std::unique_ptr<column> make_numeric_column(
99  data_type type,
100  size_type size,
101  B&& null_mask,
105 {
106  CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
107  return std::make_unique<column>(type,
108  size,
109  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
110  std::forward<B>(null_mask),
111  null_count);
112 }
113 
131 std::unique_ptr<column> make_fixed_point_column(
132  data_type type,
133  size_type size,
134  mask_state state = mask_state::UNALLOCATED,
137 
154 template <typename B>
155 std::unique_ptr<column> make_fixed_point_column(
156  data_type type,
157  size_type size,
158  B&& null_mask,
162 {
163  CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
164  return std::make_unique<column>(type,
165  size,
166  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
167  std::forward<B>(null_mask),
168  null_count);
169 }
170 
189 std::unique_ptr<column> make_timestamp_column(
190  data_type type,
191  size_type size,
192  mask_state state = mask_state::UNALLOCATED,
195 
213 template <typename B>
214 std::unique_ptr<column> make_timestamp_column(
215  data_type type,
216  size_type size,
217  B&& null_mask,
221 {
222  CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
223  return std::make_unique<column>(type,
224  size,
225  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
226  std::forward<B>(null_mask),
227  null_count);
228 }
229 
248 std::unique_ptr<column> make_duration_column(
249  data_type type,
250  size_type size,
251  mask_state state = mask_state::UNALLOCATED,
254 
272 template <typename B>
273 std::unique_ptr<column> make_duration_column(
274  data_type type,
275  size_type size,
276  B&& null_mask,
280 {
281  CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
282  return std::make_unique<column>(type,
283  size,
284  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
285  std::forward<B>(null_mask),
286  null_count);
287 }
288 
307 std::unique_ptr<column> make_fixed_width_column(
308  data_type type,
309  size_type size,
310  mask_state state = mask_state::UNALLOCATED,
313 
331 template <typename B>
332 std::unique_ptr<column> make_fixed_width_column(
333  data_type type,
334  size_type size,
335  B&& null_mask,
339 {
340  CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type.");
341  if (is_timestamp(type)) {
342  return make_timestamp_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
343  } else if (is_duration(type)) {
344  return make_duration_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
345  } else if (is_fixed_point(type)) {
346  return make_fixed_point_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
347  }
348  return make_numeric_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
349 }
350 
374 std::unique_ptr<column> make_strings_column(
375  cudf::device_span<thrust::pair<char const*, size_type> const> strings,
378 
394 std::vector<std::unique_ptr<column>> make_strings_column_batch(
395  std::vector<cudf::device_span<thrust::pair<char const*, size_type> const>> const& input,
398 
425 std::unique_ptr<column> make_strings_column(
427  string_view const null_placeholder,
430 
448 std::unique_ptr<column> make_strings_column(size_type num_strings,
449  std::unique_ptr<column> offsets_column,
450  rmm::device_buffer&& chars_buffer,
452  rmm::device_buffer&& null_mask);
453 
510 std::unique_ptr<cudf::column> make_lists_column(
511  size_type num_rows,
512  std::unique_ptr<column> offsets_column,
513  std::unique_ptr<column> child_column,
515  rmm::device_buffer&& null_mask,
518 
542 std::unique_ptr<cudf::column> make_structs_column(
543  size_type num_rows,
544  std::vector<std::unique_ptr<column>>&& child_columns,
546  rmm::device_buffer&& null_mask,
549 
564 std::unique_ptr<column> make_column_from_scalar(
565  scalar const& s,
566  size_type size,
569 
584 std::unique_ptr<column> make_dictionary_from_scalar(
585  scalar const& s,
586  size_type size,
589  // end of group
591 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
An owning class to represent a singular value.
Definition: scalar.hpp:49
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
Class definition for cudf::column.
std::vector< std::unique_ptr< column > > make_strings_column_batch(std::vector< cudf::device_span< thrust::pair< char const *, size_type > const >> const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a batch of STRING type columns given an array of device spans of pointer/size pairs.
std::unique_ptr< column > make_fixed_width_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed w...
std::unique_ptr< cudf::column > make_structs_column(size_type num_rows, std::vector< std::unique_ptr< column >> &&child_columns, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a STRUCT column using specified child columns as members.
std::unique_ptr< column > make_dictionary_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a dictionary column with size elements that are all equal to the given scalar.
std::unique_ptr< column > make_numeric_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified numeric...
std::unique_ptr< column > make_duration_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified duratio...
std::unique_ptr< column > make_timestamp_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified timesta...
std::unique_ptr< column > make_empty_column(type_id id)
Creates an empty column of the specified type.
std::unique_ptr< cudf::column > make_lists_column(size_type num_rows, std::unique_ptr< column > offsets_column, std::unique_ptr< column > child_column, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a LIST type column given offsets column, child column, null mask and null count.
std::unique_ptr< column > make_column_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a column with size elements that are all equal to the given scalar.
std::unique_ptr< column > make_strings_column(size_type num_strings, std::unique_ptr< column > offsets_column, rmm::device_buffer &&chars_buffer, size_type null_count, rmm::device_buffer &&null_mask)
Construct a STRING type column given offsets column, chars columns, and null mask and null count.
std::unique_ptr< column > make_fixed_point_column(data_type type, size_type size, B &&null_mask, size_type null_count, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed_p...
cudf::size_type null_count(bitmask_type const *bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream=cudf::get_default_stream())
Given a validity bitmask, counts the number of null elements (unset bits) in the range [start,...
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
std::unique_ptr< column > is_timestamp(strings_column_view const &input, std::string_view format, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Verifies the given strings column can be parsed to timestamps using the provided format pattern.
std::unique_ptr< column > is_fixed_point(strings_column_view const &input, data_type decimal_type=data_type{type_id::DECIMAL64}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a boolean column identifying strings in which all characters are valid for conversion to fixe...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:181
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
constexpr CUDF_HOST_DEVICE bool is_fixed_width()
Indicates whether elements of type T are fixed-width.
Definition: traits.hpp:561
constexpr CUDF_HOST_DEVICE bool is_duration()
Indicates whether the type T is a duration type.
Definition: traits.hpp:469
type_id
Identifies a column's logical element type.
Definition: types.hpp:203
constexpr CUDF_HOST_DEVICE bool is_numeric()
Indicates whether the type T is a numeric type.
Definition: traits.hpp:172
cuDF interfaces
Definition: host_udf.hpp:39
APIs for spans.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:346
Type declarations for libcudf.