column_view.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/types.hpp>
19 #include <cudf/utilities/error.hpp>
20 #include <cudf/utilities/span.hpp>
23 
24 #include <limits>
25 #include <type_traits>
26 #include <vector>
27 
33 namespace cudf {
34 namespace detail {
54  public:
71  template <typename T = void,
72  CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
73  T const* head() const noexcept
74  {
75  return static_cast<T const*>(_data);
76  }
77 
90  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
91  T const* data() const noexcept
92  {
93  return head<T>() + _offset;
94  }
95 
106  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
107  T const* begin() const noexcept
108  {
109  return data<T>();
110  }
111 
122  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
123  T const* end() const noexcept
124  {
125  return begin<T>() + size();
126  }
127 
131  [[nodiscard]] size_type size() const noexcept { return _size; }
132 
136  [[nodiscard]] size_type is_empty() const noexcept { return size() == 0; }
137 
141  [[nodiscard]] data_type type() const noexcept { return _type; }
142 
152  [[nodiscard]] bool nullable() const noexcept { return nullptr != _null_mask; }
153 
162  [[nodiscard]] size_type null_count() const;
163 
178 
186  [[nodiscard]] bool has_nulls() const { return null_count() > 0; }
187 
200  [[nodiscard]] bool has_nulls(size_type begin, size_type end) const
201  {
202  return null_count(begin, end) > 0;
203  }
204 
212  [[nodiscard]] bitmask_type const* null_mask() const noexcept { return _null_mask; }
213 
218  [[nodiscard]] size_type offset() const noexcept { return _offset; }
219 
220  protected:
221  data_type _type{type_id::EMPTY};
223  void const* _data{};
225  mutable size_type _null_count{};
229 
231  column_view_base() = default;
232  ~column_view_base() = default;
233  column_view_base(column_view_base const&) = default;
234  column_view_base(column_view_base&&) = default;
235  column_view_base& operator=(column_view_base const&) = default;
236  column_view_base& operator=(column_view_base&&) = default;
237 
267  size_type size,
268  void const* data,
269  bitmask_type const* null_mask = nullptr,
270  size_type null_count = UNKNOWN_NULL_COUNT,
271  size_type offset = 0);
272 };
273 
275  public:
276  protected:
277 };
278 } // namespace detail
279 
301  public:
302  column_view() = default;
303 
304  // these pragmas work around the nvcc issue where if a column_view is used
305  // inside of a __device__ code path, these functions will end up being created
306  // as __host__ __device__ because they are explicitly defaulted. However, if
307  // they then end up being called by a simple __host__ function
308  // (eg std::vector destructor) you get a compile error because you're trying to
309  // call a __host__ __device__ function from a __host__ function.
310 #pragma nv_exec_check_disable
311  ~column_view() = default;
312 #pragma nv_exec_check_disable
313  column_view(column_view const& c) = default;
314 
315  column_view(column_view&&) = default;
316  column_view& operator=(column_view const&) = default;
317  column_view& operator=(column_view&&) = default;
318 
350  size_type size,
351  void const* data,
352  bitmask_type const* null_mask = nullptr,
353  size_type null_count = UNKNOWN_NULL_COUNT,
354  size_type offset = 0,
355  std::vector<column_view> const& children = {});
356 
363  [[nodiscard]] column_view child(size_type child_index) const noexcept
364  {
365  return _children[child_index];
366  }
367 
371  [[nodiscard]] size_type num_children() const noexcept { return _children.size(); }
372 
376  auto child_begin() const noexcept { return _children.cbegin(); }
377 
381  auto child_end() const noexcept { return _children.cend(); }
382 
391  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
393  : column_view(
394  cudf::data_type{cudf::type_to_id<T>()}, data.size(), data.data(), nullptr, 0, 0, {})
395  {
396  CUDF_EXPECTS(
397  data.size() < static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
398  "Data exceeds the maximum size of a column view.");
399  }
400 
412  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
413  [[nodiscard]] operator device_span<T const>() const
414  {
415  CUDF_EXPECTS(type() == cudf::data_type{cudf::type_to_id<T>()},
416  "Device span type must match column view type.");
417  CUDF_EXPECTS(!nullable(), "A nullable column view cannot be converted to a device span.");
418  return device_span<T const>(data<T>(), size());
419  }
420 
421  private:
423 
424  std::vector<column_view> _children{};
425 }; // namespace cudf
427 
449  public:
450  mutable_column_view() = default;
451 
452  ~mutable_column_view() = default;
453 
454  mutable_column_view(mutable_column_view const&) = default;
455 
457  mutable_column_view& operator=(mutable_column_view const&) = default;
458  mutable_column_view& operator=(mutable_column_view&&) = default;
459 
490  size_type size,
491  void* data,
492  bitmask_type* null_mask = nullptr,
493  size_type null_count = cudf::UNKNOWN_NULL_COUNT,
494  size_type offset = 0,
495  std::vector<mutable_column_view> const& children = {});
496 
512  template <typename T = void,
513  CUDF_ENABLE_IF(std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
514  T* head() const noexcept
515  {
516  return const_cast<T*>(detail::column_view_base::head<T>());
517  }
518 
531  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
532  T* data() const noexcept
533  {
534  return const_cast<T*>(detail::column_view_base::data<T>());
535  }
536 
547  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
548  T* begin() const noexcept
549  {
550  return const_cast<T*>(detail::column_view_base::begin<T>());
551  }
552 
563  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
564  T* end() const noexcept
565  {
566  return const_cast<T*>(detail::column_view_base::end<T>());
567  }
568 
576  [[nodiscard]] bitmask_type* null_mask() const noexcept
577  {
578  return const_cast<bitmask_type*>(detail::column_view_base::null_mask());
579  }
580 
588  void set_null_count(size_type new_null_count);
589 
596  [[nodiscard]] mutable_column_view child(size_type child_index) const noexcept
597  {
598  return mutable_children[child_index];
599  }
600 
604  [[nodiscard]] size_type num_children() const noexcept { return mutable_children.size(); }
605 
609  auto child_begin() const noexcept { return mutable_children.begin(); }
610 
614  auto child_end() const noexcept { return mutable_children.end(); }
615 
621  operator column_view() const;
622 
623  private:
625 
626  std::vector<mutable_column_view> mutable_children;
627 };
628 
636 
659 
682 
683 namespace detail {
699 std::size_t shallow_hash(column_view const& input);
700 
722 bool is_shallow_equivalent(column_view const& lhs, column_view const& rhs);
723 } // namespace detail
724 } // namespace cudf
cudf::mutable_column_view::child_begin
auto child_begin() const noexcept
Returns iterator to the beginning of the ordered sequence of child column-views.
Definition: column_view.hpp:609
cudf::bit_cast
column_view bit_cast(column_view const &input, data_type type)
Zero-copy cast between types with the same size and compatible underlying representations.
cudf::mutable_column_view::num_children
size_type num_children() const noexcept
Returns the number of child columns.
Definition: column_view.hpp:604
cudf::detail::column_view_base::is_empty
size_type is_empty() const noexcept
Returns true if size() returns zero, or false otherwise.
Definition: column_view.hpp:136
cudf::detail::column_view_base::_data
void const * _data
Pointer to device memory containing elements.
Definition: column_view.hpp:223
cudf::detail::shallow_hash
std::size_t shallow_hash(column_view const &input)
Computes a hash value from the shallow state of the specified column.
cudf::detail::column_view_base::_size
size_type _size
Number of elements.
Definition: column_view.hpp:222
cudf::column_view::child_begin
auto child_begin() const noexcept
Returns iterator to the beginning of the ordered sequence of child column-views.
Definition: column_view.hpp:376
cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cudf::column_view::num_children
size_type num_children() const noexcept
Returns the number of child columns.
Definition: column_view.hpp:371
cudf::column_view
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:300
cudf::detail::column_view_base::offset
size_type offset() const noexcept
Returns the index of the first element relative to the base memory allocation, i.e....
Definition: column_view.hpp:218
types.hpp
Type declarations for libcudf.
cudf::mutable_column_view::begin
T * begin() const noexcept
Return first element (accounting for offset) when underlying data is casted to the specified type.
Definition: column_view.hpp:548
cudf::detail::is_shallow_equivalent
bool is_shallow_equivalent(column_view const &lhs, column_view const &rhs)
Uses only shallow state to determine if two column_views view equivalent columns.
cudf::column_view::bit_cast
friend column_view bit_cast(column_view const &input, data_type type)
Zero-copy cast between types with the same size and compatible underlying representations.
cudf::mutable_column_view
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
Definition: column_view.hpp:448
cudf::detail::column_view_base::_offset
size_type _offset
Definition: column_view.hpp:228
cudf::column_view::child_end
auto child_end() const noexcept
Returns iterator to the end of the ordered sequence of child column-views.
Definition: column_view.hpp:381
cudf::mutable_column_view::data
T * data() const noexcept
Returns the underlying data casted to the specified type, plus the offset.
Definition: column_view.hpp:532
cudf::bitmask_type
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:85
cudf::detail::column_view_base::null_count
size_type null_count(size_type begin, size_type end) const
Returns the count of null elements in the range [begin, end)
CUDF_ENABLE_IF
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:48
cudf::detail::column_view_base
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Definition: column_view.hpp:53
cudf::column_view::column_view
column_view(data_type type, size_type size, void const *data, bitmask_type const *null_mask=nullptr, size_type null_count=UNKNOWN_NULL_COUNT, size_type offset=0, std::vector< column_view > const &children={})
Construct a column_view from pointers to device memory for the elements and bitmask of the column.
cudf::mutable_column_view::mutable_column_view
mutable_column_view(data_type type, size_type size, void *data, bitmask_type *null_mask=nullptr, size_type null_count=cudf::UNKNOWN_NULL_COUNT, size_type offset=0, std::vector< mutable_column_view > const &children={})
Construct a mutable_column_view from pointers to device memory for the elements and bitmask of the co...
cudf::mutable_column_view::child_end
auto child_end() const noexcept
Returns iterator to the end of the ordered sequence of child column-views.
Definition: column_view.hpp:614
cudf::count_descendants
size_type count_descendants(column_view parent)
Counts the number of descendants of the specified parent.
cudf::detail::column_view_base::null_count
size_type null_count() const
Returns the count of null elements.
cudf::detail::mutable_column_view_base
Definition: column_view.hpp:274
cudf::mutable_column_view::null_mask
bitmask_type * null_mask() const noexcept
Returns raw pointer to the underlying bitmask allocation.
Definition: column_view.hpp:576
cudf::detail::column_view_base::column_view_base
column_view_base(data_type type, size_type size, void const *data, bitmask_type const *null_mask=nullptr, size_type null_count=UNKNOWN_NULL_COUNT, size_type offset=0)
Construct a column_view_base from pointers to device memory for the elements and bitmask of the colum...
cudf::data_type
Indicator for the logical data type of an element in a column.
Definition: types.hpp:240
cudf::mutable_column_view::child
mutable_column_view child(size_type child_index) const noexcept
Returns a reference to the specified child.
Definition: column_view.hpp:596
cudf::detail::column_view_base::has_nulls
bool has_nulls() const
Indicates if the column contains null elements, i.e., null_count() > 0
Definition: column_view.hpp:186
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::mutable_column_view::head
T * head() const noexcept
Returns pointer to the base device memory allocation casted to the specified type.
Definition: column_view.hpp:514
cudf::detail::column_view_base::nullable
bool nullable() const noexcept
Indicates if the column can contain null elements, i.e., if it has an allocated bitmask.
Definition: column_view.hpp:152
cudf::is_chrono
constexpr bool is_chrono()
Indicates whether the type T is a chrono type.
Definition: traits.hpp:499
cudf::detail::column_view_base::data
T const * data() const noexcept
Returns the underlying data casted to the specified type, plus the offset.
Definition: column_view.hpp:91
cudf::mutable_column_view::set_null_count
void set_null_count(size_type new_null_count)
Set the null count.
cudf::detail::column_view_base::type
data_type type() const noexcept
Returns the element data_type
Definition: column_view.hpp:141
CUDF_EXPECTS
#define CUDF_EXPECTS(cond, reason)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:91
cudf::detail::column_view_base::begin
T const * begin() const noexcept
Return first element (accounting for offset) after underlying data is casted to the specified type.
Definition: column_view.hpp:107
cudf::detail::column_view_base::end
T const * end() const noexcept
Return one past the last element after underlying data is casted to the specified type.
Definition: column_view.hpp:123
cudf::detail::column_view_base::_null_mask
bitmask_type const * _null_mask
Definition: column_view.hpp:224
cudf::detail::column_view_base::has_nulls
bool has_nulls(size_type begin, size_type end) const
Indicates if the column contains null elements in the range [begin, end), i.e., null_count(begin,...
Definition: column_view.hpp:200
cudf::detail::column_view_base::size
size_type size() const noexcept
Returns the number of elements in the column.
Definition: column_view.hpp:131
cudf::column_view::child
column_view child(size_type child_index) const noexcept
Returns the specified child.
Definition: column_view.hpp:363
cudf::device_span
Definition: span.hpp:194
type_dispatcher.hpp
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
traits.hpp
cudf::mutable_column_view::bit_cast
friend mutable_column_view bit_cast(mutable_column_view const &input, data_type type)
Zero-copy cast between types with the same size and compatible underlying representations.
cudf::detail::column_view_base::head
T const * head() const noexcept
Returns pointer to the base device memory allocation casted to the specified type.
Definition: column_view.hpp:73
cudf::mutable_column_view::end
T * end() const noexcept
Return one past the last element after underlying data is casted to the specified type.
Definition: column_view.hpp:564
error.hpp
cudf::detail::column_view_base::null_mask
bitmask_type const * null_mask() const noexcept
Returns raw pointer to the underlying bitmask allocation.
Definition: column_view.hpp:212
cudf::detail::column_view_base::_type
data_type _type
Element type.
Definition: column_view.hpp:221
cudf::detail::column_view_base::_null_count
size_type _null_count
The number of null elements.
Definition: column_view.hpp:227