column_factories.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/column/column.hpp>
19 #include <cudf/types.hpp>
21 #include <cudf/utilities/span.hpp>
23 
24 #include <rmm/cuda_stream_view.hpp>
25 #include <rmm/mr/device/per_device_resource.hpp>
26 #include <rmm/resource_ref.hpp>
27 
28 #include <thrust/pair.h>
29 
30 namespace cudf {
46 std::unique_ptr<column> make_empty_column(data_type type);
47 
56 std::unique_ptr<column> make_empty_column(type_id id);
57 
76 std::unique_ptr<column> make_numeric_column(
77  data_type type,
78  size_type size,
80  rmm::cuda_stream_view stream = cudf::get_default_stream(),
81  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
82 
100 template <typename B>
101 std::unique_ptr<column> make_numeric_column(
102  data_type type,
103  size_type size,
104  B&& null_mask,
106  rmm::cuda_stream_view stream = cudf::get_default_stream(),
107  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
108 {
109  CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
110  return std::make_unique<column>(type,
111  size,
112  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
113  std::forward<B>(null_mask),
114  null_count);
115 }
116 
134 std::unique_ptr<column> make_fixed_point_column(
135  data_type type,
136  size_type size,
138  rmm::cuda_stream_view stream = cudf::get_default_stream(),
139  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
140 
157 template <typename B>
158 std::unique_ptr<column> make_fixed_point_column(
159  data_type type,
160  size_type size,
161  B&& null_mask,
163  rmm::cuda_stream_view stream = cudf::get_default_stream(),
164  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
165 {
166  CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
167  return std::make_unique<column>(type,
168  size,
169  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
170  std::forward<B>(null_mask),
171  null_count);
172 }
173 
192 std::unique_ptr<column> make_timestamp_column(
193  data_type type,
194  size_type size,
196  rmm::cuda_stream_view stream = cudf::get_default_stream(),
197  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
198 
216 template <typename B>
217 std::unique_ptr<column> make_timestamp_column(
218  data_type type,
219  size_type size,
220  B&& null_mask,
222  rmm::cuda_stream_view stream = cudf::get_default_stream(),
223  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
224 {
225  CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
226  return std::make_unique<column>(type,
227  size,
228  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
229  std::forward<B>(null_mask),
230  null_count);
231 }
232 
251 std::unique_ptr<column> make_duration_column(
252  data_type type,
253  size_type size,
255  rmm::cuda_stream_view stream = cudf::get_default_stream(),
256  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
257 
275 template <typename B>
276 std::unique_ptr<column> make_duration_column(
277  data_type type,
278  size_type size,
279  B&& null_mask,
281  rmm::cuda_stream_view stream = cudf::get_default_stream(),
282  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
283 {
284  CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
285  return std::make_unique<column>(type,
286  size,
287  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
288  std::forward<B>(null_mask),
289  null_count);
290 }
291 
310 std::unique_ptr<column> make_fixed_width_column(
311  data_type type,
312  size_type size,
314  rmm::cuda_stream_view stream = cudf::get_default_stream(),
315  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
316 
334 template <typename B>
335 std::unique_ptr<column> make_fixed_width_column(
336  data_type type,
337  size_type size,
338  B&& null_mask,
340  rmm::cuda_stream_view stream = cudf::get_default_stream(),
341  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
342 {
343  CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type.");
344  if (is_timestamp(type)) {
345  return make_timestamp_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
346  } else if (is_duration(type)) {
347  return make_duration_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
348  } else if (is_fixed_point(type)) {
349  return make_fixed_point_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
350  }
351  return make_numeric_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
352 }
353 
377 std::unique_ptr<column> make_strings_column(
378  cudf::device_span<thrust::pair<char const*, size_type> const> strings,
379  rmm::cuda_stream_view stream = cudf::get_default_stream(),
380  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
381 
408 std::unique_ptr<column> make_strings_column(
410  string_view const null_placeholder,
411  rmm::cuda_stream_view stream = cudf::get_default_stream(),
412  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
413 
431 std::unique_ptr<column> make_strings_column(size_type num_strings,
432  std::unique_ptr<column> offsets_column,
433  rmm::device_buffer&& chars_buffer,
435  rmm::device_buffer&& null_mask);
436 
493 std::unique_ptr<cudf::column> make_lists_column(
494  size_type num_rows,
495  std::unique_ptr<column> offsets_column,
496  std::unique_ptr<column> child_column,
498  rmm::device_buffer&& null_mask,
499  rmm::cuda_stream_view stream = cudf::get_default_stream(),
500  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
501 
525 std::unique_ptr<cudf::column> make_structs_column(
526  size_type num_rows,
527  std::vector<std::unique_ptr<column>>&& child_columns,
529  rmm::device_buffer&& null_mask,
530  rmm::cuda_stream_view stream = cudf::get_default_stream(),
531  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
532 
547 std::unique_ptr<column> make_column_from_scalar(
548  scalar const& s,
549  size_type size,
550  rmm::cuda_stream_view stream = cudf::get_default_stream(),
551  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
552 
567 std::unique_ptr<column> make_dictionary_from_scalar(
568  scalar const& s,
569  size_type size,
570  rmm::cuda_stream_view stream = cudf::get_default_stream(),
571  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
572  // end of group
574 } // namespace cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:241
An owning class to represent a singular value.
Definition: scalar.hpp:49
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
Class definition for cudf::column.
std::unique_ptr< cudf::column > make_structs_column(size_type num_rows, std::vector< std::unique_ptr< column >> &&child_columns, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a STRUCT column using specified child columns as members.
std::unique_ptr< column > make_column_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a column with size elements that are all equal to the given scalar.
std::unique_ptr< column > make_fixed_point_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed_p...
std::unique_ptr< column > make_numeric_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified numeric...
std::unique_ptr< column > make_empty_column(data_type type)
Creates an empty column of the specified type.
std::unique_ptr< column > make_strings_column(cudf::device_span< thrust::pair< char const *, size_type > const > strings, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a STRING type column given a device span of pointer/size pairs.
std::unique_ptr< column > make_timestamp_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified timesta...
std::unique_ptr< column > make_dictionary_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a dictionary column with size elements that are all equal to the given scalar.
std::unique_ptr< column > make_duration_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified duratio...
std::unique_ptr< column > make_fixed_width_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed w...
std::unique_ptr< cudf::column > make_lists_column(size_type num_rows, std::unique_ptr< column > offsets_column, std::unique_ptr< column > child_column, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a LIST type column given offsets column, child column, null mask and null count.
cudf::size_type null_count(bitmask_type const *bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream=cudf::get_default_stream())
Given a validity bitmask, counts the number of null elements (unset bits) in the range [start,...
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:177
constexpr bool is_fixed_point()
Indicates whether the type T is a fixed-point type.
Definition: traits.hpp:397
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:179
constexpr bool is_duration()
Indicates whether the type T is a duration type.
Definition: traits.hpp:423
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
constexpr bool is_numeric()
Indicates whether the type T is a numeric type.
Definition: traits.hpp:174
constexpr bool is_timestamp()
Indicates whether the type T is a timestamp type.
Definition: traits.hpp:373
constexpr bool is_fixed_width()
Indicates whether elements of type T are fixed-width.
Definition: traits.hpp:515
type_id
Identifies a column's logical element type.
Definition: types.hpp:201
@ UNALLOCATED
Null mask not allocated, (all elements are valid)
cuDF interfaces
Definition: aggregation.hpp:34
APIs for spans.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:291
Type declarations for libcudf.