column_view.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <cudf/types.hpp>
10 #include <cudf/utilities/span.hpp>
13 
14 #include <cuda/std/span>
15 
16 #include <limits>
17 #include <type_traits>
18 #include <vector>
19 
24 namespace CUDF_EXPORT cudf {
25 namespace detail {
45  public:
62  template <typename T = void,
63  CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
64  T const* head() const noexcept
65  {
66  return static_cast<T const*>(get_data());
67  }
68 
81  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
82  T const* data() const noexcept
83  {
84  return head<T>() + _offset;
85  }
86 
97  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
98  T const* begin() const noexcept
99  {
100  return data<T>();
101  }
102 
113  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
114  T const* end() const noexcept
115  {
116  return begin<T>() + size();
117  }
118 
124  [[nodiscard]] size_type size() const noexcept { return _size; }
125 
131  [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }
132 
138  [[nodiscard]] data_type type() const noexcept { return _type; }
139 
149  [[nodiscard]] bool nullable() const noexcept { return nullptr != _null_mask; }
150 
156  [[nodiscard]] size_type null_count() const { return _null_count; }
157 
173  [[nodiscard]] size_type null_count(
174  size_type begin,
175  size_type end,
177 
185  [[nodiscard]] bool has_nulls() const { return null_count() > 0; }
186 
200  [[nodiscard]] bool has_nulls(size_type begin,
201  size_type end,
203  {
204  return null_count(begin, end, stream) > 0;
205  }
206 
215  [[nodiscard]] bitmask_type const* null_mask() const noexcept { return _null_mask; }
216 
223  [[nodiscard]] size_type offset() const noexcept { return _offset; }
224 
225  protected:
235  [[nodiscard]] virtual void const* get_data() const noexcept { return _data; }
236 
237  data_type _type{type_id::EMPTY};
238  size_type _size{};
239  void const* _data{};
240  bitmask_type const* _null_mask{};
243  mutable size_type _null_count{};
244  size_type _offset{};
246 
247  column_view_base() = default;
248  virtual ~column_view_base() = default;
249  column_view_base(column_view_base const&) = default;
263 
289  size_type size,
290  void const* data,
291  bitmask_type const* null_mask,
293  size_type offset = 0);
294 };
295 
296 } // namespace detail
297 
319  public:
320  column_view() = default;
321 
322  // these pragmas work around the nvcc issue where if a column_view is used
323  // inside of a __device__ code path, these functions will end up being created
324  // as __host__ __device__ because they are explicitly defaulted. However, if
325  // they then end up being called by a simple __host__ function
326  // (eg std::vector destructor) you get a compile error because you're trying to
327  // call a __host__ __device__ function from a __host__ function.
328 #ifdef __CUDACC__
329 #pragma nv_exec_check_disable
330 #endif
331  ~column_view() override = default;
332 #ifdef __CUDACC__
333 #pragma nv_exec_check_disable
334 #endif
335  column_view(column_view const&) = default;
336  column_view(column_view&&) = default;
342  column_view& operator=(column_view const&) = default;
349 
377  size_type size,
378  void const* data,
379  bitmask_type const* null_mask,
381  size_type offset = 0,
382  std::vector<column_view> const& children = {});
383 
390  [[nodiscard]] column_view child(size_type child_index) const noexcept
391  {
392  return _children[child_index];
393  }
394 
400  [[nodiscard]] size_type num_children() const noexcept { return _children.size(); }
401 
407  auto child_begin() const noexcept { return _children.cbegin(); }
408 
414  auto child_end() const noexcept { return _children.cend(); }
415 
424  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
425  column_view(device_span<T const> data)
426  : column_view(
427  cudf::data_type{cudf::type_to_id<T>()}, data.size(), data.data(), nullptr, 0, 0, {})
428  {
429  CUDF_EXPECTS(
430  data.size() <= static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
431  "Data exceeds the column size limit",
432  std::overflow_error);
433  }
434 
446  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
447  [[nodiscard]] operator device_span<T const>() const
448  {
449  CUDF_EXPECTS(type() == cudf::data_type{cudf::type_to_id<T>()},
450  "Device span type must match column view type.");
451  CUDF_EXPECTS(!nullable(), "A nullable column view cannot be converted to a device span.");
452  return device_span<T const>(data<T>(), size());
453  }
454 
466  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
467  [[nodiscard]] operator cuda::std::span<T const>() const
468  {
469  CUDF_EXPECTS(type() == cudf::data_type{cudf::type_to_id<T>()},
470  "Span type must match column view type");
471  CUDF_EXPECTS(!nullable(), "A nullable column view cannot be converted to a span.");
472  return cuda::std::span<T const>(data<T>(), size());
473  }
474 
475  protected:
485  void const* get_data() const noexcept override;
486 
487  private:
488  friend column_view bit_cast(column_view const& input, data_type type);
489 
490  std::vector<column_view> _children{};
492 }; // namespace cudf
493 
515  public:
516  mutable_column_view() = default;
517 
518  ~mutable_column_view() override {
519  // Needed so that the first instance of the implicit destructor for any TU isn't 'constructed'
520  // from a host+device function marking the implicit version also as host+device
521  };
522 
537 
564  size_type size,
565  void* data,
566  bitmask_type* null_mask,
568  size_type offset = 0,
569  std::vector<mutable_column_view> const& children = {});
570 
586  template <typename T = void,
587  CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
588  T* head() const noexcept
589  {
590  return const_cast<T*>(detail::column_view_base::head<T>());
591  }
592 
605  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
606  T* data() const noexcept
607  {
608  return const_cast<T*>(detail::column_view_base::data<T>());
609  }
610 
621  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
622  T* begin() const noexcept
623  {
624  return const_cast<T*>(detail::column_view_base::begin<T>());
625  }
626 
637  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
638  T* end() const noexcept
639  {
640  return const_cast<T*>(detail::column_view_base::end<T>());
641  }
642 
652  [[nodiscard]] bitmask_type* null_mask() const noexcept
653  {
654  return const_cast<bitmask_type*>(detail::column_view_base::null_mask());
655  }
656 
664  void set_null_count(size_type new_null_count);
665 
672  [[nodiscard]] mutable_column_view child(size_type child_index) const noexcept
673  {
674  return mutable_children[child_index];
675  }
676 
682  [[nodiscard]] size_type num_children() const noexcept { return mutable_children.size(); }
683 
689  auto child_begin() const noexcept { return mutable_children.begin(); }
690 
696  auto child_end() const noexcept { return mutable_children.end(); }
697 
703  operator column_view() const;
704 
705  protected:
715  [[nodiscard]] void const* get_data() const noexcept override;
716 
717  private:
719 
720  std::vector<mutable_column_view> mutable_children;
721 };
722 
730 
753 
776 
777 namespace detail {
793 std::size_t shallow_hash(column_view const& input);
794 
816 bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs);
817 
818 } // namespace detail
819 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
column_view(column_view &&)=default
Move constructor.
size_type num_children() const noexcept
Returns the number of child columns.
auto child_begin() const noexcept
Returns iterator to the beginning of the ordered sequence of child column-views.
column_view & operator=(column_view &&)=default
Move assignment operator.
column_view & operator=(column_view const &)=default
Copy assignment operator.
void const * get_data() const noexcept override
Returns pointer to the base device memory allocation.
auto child_end() const noexcept
Returns iterator to the end of the ordered sequence of child column-views.
column_view child(size_type child_index) const noexcept
Returns the specified child.
column_view(data_type type, size_type size, void const *data, bitmask_type const *null_mask, size_type null_count, size_type offset=0, std::vector< column_view > const &children={})
Construct a column_view from pointers to device memory for the elements and bitmask of the column.
column_view(column_view const &)=default
Copy constructor.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:238
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:44
T const * end() const noexcept
Return one past the last element after underlying data is casted to the specified type.
data_type type() const noexcept
Returns the element data_type
bool has_nulls(size_type begin, size_type end, rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates if the column contains null elements in the range [begin, end), i.e., null_count(begin,...
column_view_base(column_view_base &&)=default
Move constructor.
column_view_base(data_type type, size_type size, void const *data, bitmask_type const *null_mask, size_type null_count, size_type offset=0)
Construct a column_view_base from pointers to device memory for the elements and bitmask of the colum...
size_type null_count() const
Returns the count of null elements.
size_type null_count(size_type begin, size_type end, rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Returns the count of null elements in the range [begin, end)
column_view_base & operator=(column_view_base const &)=default
Copy assignment operator.
column_view_base & operator=(column_view_base &&)=default
Move assignment operator.
size_type size() const noexcept
Returns the number of elements in the column.
T const * begin() const noexcept
Return first element (accounting for offset) after underlying data is casted to the specified type.
Definition: column_view.hpp:98
T const * data() const noexcept
Returns the underlying data casted to the specified type, plus the offset.
Definition: column_view.hpp:82
size_type offset() const noexcept
Returns the index of the first element relative to the base memory allocation, i.e....
virtual void const * get_data() const noexcept
Returns pointer to the base device memory allocation.
T const * head() const noexcept
Returns pointer to the base device memory allocation casted to the specified type.
Definition: column_view.hpp:64
bool has_nulls() const
Indicates if the column contains null elements, i.e., null_count() > 0
bitmask_type const * null_mask() const noexcept
Returns raw pointer to the underlying bitmask allocation.
bool nullable() const noexcept
Indicates if the column can contain null elements, i.e., if it has an allocated bitmask.
column_view_base(column_view_base const &)=default
Copy constructor.
bool is_empty() const noexcept
Returns true if size() returns zero, or false otherwise.
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
T * head() const noexcept
Returns pointer to the base device memory allocation casted to the specified type.
size_type num_children() const noexcept
Returns the number of child columns.
T * begin() const noexcept
Return first element (accounting for offset) after underlying data is casted to the specified type.
void set_null_count(size_type new_null_count)
Set the null count.
auto child_begin() const noexcept
Returns iterator to the beginning of the ordered sequence of child column-views.
mutable_column_view(data_type type, size_type size, void *data, bitmask_type *null_mask, size_type null_count, size_type offset=0, std::vector< mutable_column_view > const &children={})
Construct a mutable_column_view from pointers to device memory for the elements and bitmask of the co...
T * data() const noexcept
Returns the underlying data casted to the specified type, plus the offset.
mutable_column_view(mutable_column_view const &)=default
Copy constructor.
mutable_column_view & operator=(mutable_column_view &&)=default
Move assignment operator.
mutable_column_view & operator=(mutable_column_view const &)=default
Copy assignment operator.
auto child_end() const noexcept
Returns iterator to the end of the ordered sequence of child column-views.
bitmask_type * null_mask() const noexcept
Returns raw pointer to the underlying bitmask allocation.
void const * get_data() const noexcept override
Returns pointer to the base device memory allocation.
mutable_column_view child(size_type child_index) const noexcept
Returns a reference to the specified child.
mutable_column_view(mutable_column_view &&)=default
Move constructor.
T * end() const noexcept
Return one past the last element after underlying data is casted to the specified type.
std::size_t shallow_hash(column_view const &input)
Computes a hash value from the shallow state of the specified column.
bool is_shallow_equivalent(column_view const &lhs, column_view const &rhs)
Uses only shallow state to determine if two column_views view equivalent columns.
cudf::size_type null_count(bitmask_type const *bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream=cudf::get_default_stream())
Given a validity bitmask, counts the number of null elements (unset bits) in the range [start,...
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:143
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:85
constexpr CUDF_HOST_DEVICE bool is_chrono()
Indicates whether the type T is a chrono type.
Definition: traits.hpp:490
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:39
cuDF interfaces
Definition: host_udf.hpp:26
bool nullable(table_view const &view)
Returns True if any of the columns in the table is nullable. (not entire hierarchy)
mutable_column_view bit_cast(mutable_column_view const &input, data_type type)
Zero-copy cast between types with the same size and compatible underlying representations.
size_type count_descendants(column_view parent)
Counts the number of descendants of the specified parent.
APIs for spans.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:323
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
Type declarations for libcudf.