column_factories.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/column/column.hpp>
19 #include <cudf/types.hpp>
20 #include <cudf/utilities/default_stream.hpp>
21 #include <cudf/utilities/span.hpp>
23 
24 #include <rmm/cuda_stream_view.hpp>
26 
27 #include <thrust/pair.h>
28 
29 namespace cudf {
45 std::unique_ptr<column> make_empty_column(data_type type);
46 
55 std::unique_ptr<column> make_empty_column(type_id id);
56 
75 std::unique_ptr<column> make_numeric_column(
76  data_type type,
77  size_type size,
78  mask_state state = mask_state::UNALLOCATED,
80  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
81 
99 template <typename B>
100 std::unique_ptr<column> make_numeric_column(
101  data_type type,
102  size_type size,
103  B&& null_mask,
104  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
106  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
107 {
108  CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
109  return std::make_unique<column>(type,
110  size,
111  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
112  std::forward<B>(null_mask),
113  null_count);
114 }
115 
133 std::unique_ptr<column> make_fixed_point_column(
134  data_type type,
135  size_type size,
136  mask_state state = mask_state::UNALLOCATED,
138  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
139 
156 template <typename B>
157 std::unique_ptr<column> make_fixed_point_column(
158  data_type type,
159  size_type size,
160  B&& null_mask,
161  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
163  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
164 {
165  CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
166  return std::make_unique<column>(type,
167  size,
168  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
169  std::forward<B>(null_mask),
170  null_count);
171 }
172 
191 std::unique_ptr<column> make_timestamp_column(
192  data_type type,
193  size_type size,
194  mask_state state = mask_state::UNALLOCATED,
196  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
197 
215 template <typename B>
216 std::unique_ptr<column> make_timestamp_column(
217  data_type type,
218  size_type size,
219  B&& null_mask,
220  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
222  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
223 {
224  CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
225  return std::make_unique<column>(type,
226  size,
227  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
228  std::forward<B>(null_mask),
229  null_count);
230 }
231 
250 std::unique_ptr<column> make_duration_column(
251  data_type type,
252  size_type size,
253  mask_state state = mask_state::UNALLOCATED,
255  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
256 
274 template <typename B>
275 std::unique_ptr<column> make_duration_column(
276  data_type type,
277  size_type size,
278  B&& null_mask,
279  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
281  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
282 {
283  CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
284  return std::make_unique<column>(type,
285  size,
286  rmm::device_buffer{size * cudf::size_of(type), stream, mr},
287  std::forward<B>(null_mask),
288  null_count);
289 }
290 
309 std::unique_ptr<column> make_fixed_width_column(
310  data_type type,
311  size_type size,
312  mask_state state = mask_state::UNALLOCATED,
314  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
315 
333 template <typename B>
334 std::unique_ptr<column> make_fixed_width_column(
335  data_type type,
336  size_type size,
337  B&& null_mask,
338  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
340  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
341 {
342  CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type.");
343  if (is_timestamp(type)) {
344  return make_timestamp_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
345  } else if (is_duration(type)) {
346  return make_duration_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
347  } else if (is_fixed_point(type)) {
348  return make_fixed_point_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
349  }
350  return make_numeric_column(type, size, std::forward<B>(null_mask), null_count, stream, mr);
351 }
352 
376 std::unique_ptr<column> make_strings_column(
377  cudf::device_span<thrust::pair<const char*, size_type> const> strings,
379  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
380 
407 std::unique_ptr<column> make_strings_column(
409  const string_view null_placeholder,
411  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
412 
441 std::unique_ptr<column> make_strings_column(
445  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
447  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
448 
466 std::unique_ptr<column> make_strings_column(size_type num_strings,
467  std::unique_ptr<column> offsets_column,
468  std::unique_ptr<column> chars_column,
469  size_type null_count,
470  rmm::device_buffer&& null_mask);
471 
487 std::unique_ptr<column> make_strings_column(size_type num_strings,
490  rmm::device_buffer&& null_mask = {},
491  size_type null_count = cudf::UNKNOWN_NULL_COUNT);
492 
549 std::unique_ptr<cudf::column> make_lists_column(
550  size_type num_rows,
551  std::unique_ptr<column> offsets_column,
552  std::unique_ptr<column> child_column,
553  size_type null_count,
554  rmm::device_buffer&& null_mask,
556  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
557 
581 std::unique_ptr<cudf::column> make_structs_column(
582  size_type num_rows,
583  std::vector<std::unique_ptr<column>>&& child_columns,
584  size_type null_count,
585  rmm::device_buffer&& null_mask,
587  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
588 
603 std::unique_ptr<column> make_column_from_scalar(
604  scalar const& s,
605  size_type size,
607  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
608 
623 std::unique_ptr<column> make_dictionary_from_scalar(
624  scalar const& s,
625  size_type size,
627  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
628  // end of group
630 } // namespace cudf
cudf::make_fixed_point_column
std::unique_ptr< column > make_fixed_point_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed_p...
cudf::make_strings_column
std::unique_ptr< column > make_strings_column(cudf::device_span< thrust::pair< const char *, size_type > const > strings, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a STRING type column given a device span of pointer/size pairs.
per_device_resource.hpp
cudf::make_empty_column
std::unique_ptr< column > make_empty_column(data_type type)
Creates an empty column of the specified type.
cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:80
column.hpp
Class definition for cudf::column.
cudf::type_id
type_id
Identifies a column's logical element type.
Definition: types.hpp:196
types.hpp
Type declarations for libcudf.
cudf::string_view
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
rmm::cuda_stream_view
cudf::size_of
std::size_t size_of(data_type t)
Returns the size in bytes of elements of the specified data_type
cudf::is_numeric
constexpr bool is_numeric()
Indicates whether the type T is a numeric type.
Definition: traits.hpp:180
cudf::make_fixed_width_column
std::unique_ptr< column > make_fixed_width_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified fixed w...
rmm::device_uvector< size_type >
cudf::make_duration_column
std::unique_ptr< column > make_duration_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified duratio...
cudf::make_dictionary_from_scalar
std::unique_ptr< column > make_dictionary_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a dictionary column with size elements that are all equal to the given scalar.
rmm::device_buffer
cudf::mask_state
mask_state
Controls the allocation/initialization of a null mask.
Definition: types.hpp:174
cudf::make_lists_column
std::unique_ptr< cudf::column > make_lists_column(size_type num_rows, std::unique_ptr< column > offsets_column, std::unique_ptr< column > child_column, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a LIST type column given offsets column, child column, null mask and null count.
cudf::scalar
An owning class to represent a singular value.
Definition: scalar.hpp:48
cudf::make_timestamp_column
std::unique_ptr< column > make_timestamp_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified timesta...
cudf::data_type
Indicator for the logical data type of an element in a column.
Definition: types.hpp:236
cudf::make_structs_column
std::unique_ptr< cudf::column > make_structs_column(size_type num_rows, std::vector< std::unique_ptr< column >> &&child_columns, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a STRUCT column using specified child columns as members.
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::is_fixed_width
constexpr bool is_fixed_width()
Indicates whether elements of type T are fixed-width.
Definition: traits.hpp:494
cudf::get_default_stream
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
cudf::make_numeric_column
std::unique_ptr< column > make_numeric_column(data_type type, size_type size, mask_state state=mask_state::UNALLOCATED, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct column with sufficient uninitialized storage to hold size elements of the specified numeric...
cudf::is_fixed_point
constexpr bool is_fixed_point()
Indicates whether the type T is a fixed-point type.
Definition: traits.hpp:379
cudf::is_duration
constexpr bool is_duration()
Indicates whether the type T is a duration type.
Definition: traits.hpp:402
cudf::is_timestamp
constexpr bool is_timestamp()
Indicates whether the type T is a timestamp type.
Definition: traits.hpp:355
rmm::mr::device_memory_resource
cudf::make_column_from_scalar
std::unique_ptr< column > make_column_from_scalar(scalar const &s, size_type size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Construct a column with size elements that are all equal to the given scalar.
cudf::device_span
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:285
traits.hpp
CUDF_EXPECTS
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:123