column.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
19 #include <cudf/null_mask.hpp>
20 #include <cudf/types.hpp>
22 
23 #include <rmm/cuda_stream_view.hpp>
24 #include <rmm/device_buffer.hpp>
25 #include <rmm/device_uvector.hpp>
26 #include <rmm/mr/device/per_device_resource.hpp>
27 #include <rmm/resource_ref.hpp>
28 
29 #include <memory>
30 #include <type_traits>
31 #include <utility>
32 #include <vector>
33 
39 namespace cudf {
40 
48 class column {
49  public:
50  column() = default;
51  ~column() = default;
52  column& operator=(column const& other) = delete;
53  column& operator=(column&& other) = delete;
54 
66  column(column const& other,
67  rmm::cuda_stream_view stream = cudf::get_default_stream(),
68  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
69 
77  column(column&& other) noexcept;
78 
86  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
87  column(rmm::device_uvector<T>&& other, rmm::device_buffer&& null_mask, size_type null_count)
88  : _type{cudf::data_type{cudf::type_to_id<T>()}},
89  _size{[&]() {
91  other.size() <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
92  "The device_uvector size exceeds the column size limit",
93  std::overflow_error);
94  return static_cast<size_type>(other.size());
95  }()},
96  _data{other.release()},
97  _null_mask{std::move(null_mask)},
98  _null_count{null_count}
99  {
100  }
101 
117  template <typename B1, typename B2 = rmm::device_buffer>
119  size_type size,
120  B1&& data,
121  B2&& null_mask,
123  std::vector<std::unique_ptr<column>>&& children = {})
124  : _type{dtype},
125  _size{size},
126  _data{std::forward<B1>(data)},
127  _null_mask{std::forward<B2>(null_mask)},
128  _null_count{null_count},
129  _children{std::move(children)}
130  {
131  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
132  }
133 
145  rmm::cuda_stream_view stream = cudf::get_default_stream(),
146  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
147 
153  [[nodiscard]] data_type type() const noexcept { return _type; }
154 
160  [[nodiscard]] size_type size() const noexcept { return _size; }
161 
167  [[nodiscard]] size_type null_count() const { return _null_count; }
168 
180  void set_null_mask(rmm::device_buffer&& new_null_mask, size_type new_null_count);
181 
194  void set_null_mask(rmm::device_buffer const& new_null_mask,
195  size_type new_null_count,
196  rmm::cuda_stream_view stream = cudf::get_default_stream());
197 
205  void set_null_count(size_type new_null_count);
206 
219  [[nodiscard]] bool nullable() const noexcept { return (_null_mask.size() > 0); }
220 
227  [[nodiscard]] bool has_nulls() const noexcept { return (null_count() > 0); }
228 
234  [[nodiscard]] size_type num_children() const noexcept { return _children.size(); }
235 
242  column& child(size_type child_index) noexcept { return *_children[child_index]; };
243 
250  [[nodiscard]] column const& child(size_type child_index) const noexcept
251  {
252  return *_children[child_index];
253  };
254 
260  struct contents {
261  std::unique_ptr<rmm::device_buffer> data;
262  std::unique_ptr<rmm::device_buffer> null_mask;
263  std::vector<std::unique_ptr<column>> children;
264  };
265 
281  contents release() noexcept;
282 
289  [[nodiscard]] column_view view() const;
290 
299  operator column_view() const { return this->view(); };
300 
308 
320  operator mutable_column_view() { return this->mutable_view(); };
321 
322  private:
324  cudf::size_type _size{};
325  rmm::device_buffer _data{};
327  rmm::device_buffer _null_mask{};
329  mutable cudf::size_type _null_count{};
330  std::vector<std::unique_ptr<column>> _children{};
332 };
333  // end of group
335 } // namespace cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:48
column(column_view view, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a new column by deep copying the contents of a column_view.
data_type type() const noexcept
Returns the column's logical element type.
Definition: column.hpp:153
bool has_nulls() const noexcept
Indicates whether the column contains null elements.
Definition: column.hpp:227
size_type null_count() const
Returns the count of null elements.
Definition: column.hpp:167
size_type num_children() const noexcept
Returns the number of child columns.
Definition: column.hpp:234
void set_null_mask(rmm::device_buffer const &new_null_mask, size_type new_null_count, rmm::cuda_stream_view stream=cudf::get_default_stream())
Sets the column's null value indicator bitmask to new_null_mask.
mutable_column_view mutable_view()
Creates a mutable, non-owning view of the column's data, null mask, and children.
column(column &&other) noexcept
Move the contents from other to create a new column.
column const & child(size_type child_index) const noexcept
Returns a const reference to the specified child.
Definition: column.hpp:250
column & child(size_type child_index) noexcept
Returns a reference to the specified child.
Definition: column.hpp:242
column(data_type dtype, size_type size, B1 &&data, B2 &&null_mask, size_type null_count, std::vector< std::unique_ptr< column >> &&children={})
Construct a new column from existing device memory.
Definition: column.hpp:118
void set_null_mask(rmm::device_buffer &&new_null_mask, size_type new_null_count)
Sets the column's null value indicator bitmask to new_null_mask.
column(column const &other, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Construct a new column object by deep copying the contents of other.
column_view view() const
Creates an immutable, non-owning view of the column's data and children.
bool nullable() const noexcept
Indicates whether it is possible for the column to contain null values, i.e., it has an allocated nul...
Definition: column.hpp:219
contents release() noexcept
Releases ownership of the column's contents.
size_type size() const noexcept
Returns the number of elements.
Definition: column.hpp:160
void set_null_count(size_type new_null_count)
Updates the count of null elements.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:241
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
column view class definitions
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:177
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
constexpr bool is_chrono()
Indicates whether the type T is a chrono type.
Definition: traits.hpp:447
@ EMPTY
Always null with no underlying data.
cuDF interfaces
Definition: aggregation.hpp:34
APIs for managing validity bitmasks.
Wrapper for the contents of a column.
Definition: column.hpp:260
std::unique_ptr< rmm::device_buffer > data
data device memory buffer
Definition: column.hpp:261
std::unique_ptr< rmm::device_buffer > null_mask
null mask device memory buffer
Definition: column.hpp:262
std::vector< std::unique_ptr< column > > children
child columns
Definition: column.hpp:263
Type declarations for libcudf.