column.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
19 #include <cudf/null_mask.hpp>
20 #include <cudf/types.hpp>
23 
24 #include <rmm/cuda_stream_view.hpp>
25 #include <rmm/device_buffer.hpp>
26 #include <rmm/device_uvector.hpp>
27 
28 #include <memory>
29 #include <type_traits>
30 #include <utility>
31 #include <vector>
32 
38 namespace CUDF_EXPORT cudf {
39 
47 class column {
48  public:
49  column() = default;
50  ~column() = default;
51  column& operator=(column const& other) = delete;
52  column& operator=(column&& other) = delete;
53 
65  column(column const& other,
68 
76  column(column&& other) noexcept;
77 
85  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
86  column(rmm::device_uvector<T>&& other, rmm::device_buffer&& null_mask, size_type null_count)
87  : _type{cudf::data_type{cudf::type_to_id<T>()}},
88  _size{[&]() {
90  other.size() <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
91  "The device_uvector size exceeds the column size limit",
92  std::overflow_error);
93  return static_cast<size_type>(other.size());
94  }()},
95  _data{other.release()},
96  _null_mask{std::move(null_mask)},
97  _null_count{null_count}
98  {
99  }
100 
116  template <typename B1, typename B2 = rmm::device_buffer>
118  size_type size,
119  B1&& data,
120  B2&& null_mask,
122  std::vector<std::unique_ptr<column>>&& children = {})
123  : _type{dtype},
124  _size{size},
125  _data{std::forward<B1>(data)},
126  _null_mask{std::forward<B2>(null_mask)},
127  _null_count{null_count},
128  _children{std::move(children)}
129  {
130  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
131  }
132 
143  explicit column(column_view view,
146 
152  [[nodiscard]] data_type type() const noexcept { return _type; }
153 
159  [[nodiscard]] size_type size() const noexcept { return _size; }
160 
166  [[nodiscard]] size_type null_count() const { return _null_count; }
167 
179  void set_null_mask(rmm::device_buffer&& new_null_mask, size_type new_null_count);
180 
193  void set_null_mask(rmm::device_buffer const& new_null_mask,
194  size_type new_null_count,
196 
204  void set_null_count(size_type new_null_count);
205 
218  [[nodiscard]] bool nullable() const noexcept { return (_null_mask.size() > 0); }
219 
226  [[nodiscard]] bool has_nulls() const noexcept { return (null_count() > 0); }
227 
233  [[nodiscard]] size_type num_children() const noexcept { return _children.size(); }
234 
241  column& child(size_type child_index) noexcept { return *_children[child_index]; };
242 
249  [[nodiscard]] column const& child(size_type child_index) const noexcept
250  {
251  return *_children[child_index];
252  };
253 
259  struct contents {
260  std::unique_ptr<rmm::device_buffer> data;
261  std::unique_ptr<rmm::device_buffer> null_mask;
262  std::vector<std::unique_ptr<column>> children;
263  };
264 
280  contents release() noexcept;
281 
288  [[nodiscard]] column_view view() const;
289 
298  operator column_view() const { return this->view(); };
299 
307 
319  operator mutable_column_view() { return this->mutable_view(); };
320 
321  private:
322  cudf::data_type _type{type_id::EMPTY};
323  cudf::size_type _size{};
324  rmm::device_buffer _data{};
326  rmm::device_buffer _null_mask{};
328  mutable cudf::size_type _null_count{};
329  std::vector<std::unique_ptr<column>> _children{};
331 };
332  // end of group
334 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:47
data_type type() const noexcept
Returns the column's logical element type.
Definition: column.hpp:152
bool has_nulls() const noexcept
Indicates whether the column contains null elements.
Definition: column.hpp:226
column(column_view view, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new column by deep copying the contents of a column_view.
size_type null_count() const
Returns the count of null elements.
Definition: column.hpp:166
size_type num_children() const noexcept
Returns the number of child columns.
Definition: column.hpp:233
void set_null_mask(rmm::device_buffer const &new_null_mask, size_type new_null_count, rmm::cuda_stream_view stream=cudf::get_default_stream())
Sets the column's null value indicator bitmask to new_null_mask.
mutable_column_view mutable_view()
Creates a mutable, non-owning view of the column's data, null mask, and children.
column(column &&other) noexcept
Move the contents from other to create a new column.
column const & child(size_type child_index) const noexcept
Returns a const reference to the specified child.
Definition: column.hpp:249
column & child(size_type child_index) noexcept
Returns a reference to the specified child.
Definition: column.hpp:241
column(data_type dtype, size_type size, B1 &&data, B2 &&null_mask, size_type null_count, std::vector< std::unique_ptr< column >> &&children={})
Construct a new column from existing device memory.
Definition: column.hpp:117
void set_null_mask(rmm::device_buffer &&new_null_mask, size_type new_null_count)
Sets the column's null value indicator bitmask to new_null_mask.
column(column const &other, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new column object by deep copying the contents of other.
bool nullable() const noexcept
Indicates whether it is possible for the column to contain null values, i.e., it has an allocated nul...
Definition: column.hpp:218
contents release() noexcept
Releases ownership of the column's contents.
size_type size() const noexcept
Returns the number of elements.
Definition: column.hpp:159
void set_null_count(size_type new_null_count)
Updates the count of null elements.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
column view class definitions
cudf::size_type null_count(bitmask_type const *bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream=cudf::get_default_stream())
Given a validity bitmask, counts the number of null elements (unset bits) in the range [start,...
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
constexpr CUDF_HOST_DEVICE bool is_chrono()
Indicates whether the type T is a chrono type.
Definition: traits.hpp:493
cuDF interfaces
Definition: host_udf.hpp:39
APIs for managing validity bitmasks.
Wrapper for the contents of a column.
Definition: column.hpp:259
std::unique_ptr< rmm::device_buffer > data
data device memory buffer
Definition: column.hpp:260
std::unique_ptr< rmm::device_buffer > null_mask
null mask device memory buffer
Definition: column.hpp:261
std::vector< std::unique_ptr< column > > children
child columns
Definition: column.hpp:262
Type declarations for libcudf.