column_device_view_base.cuh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/detail/offsets_iterator.cuh>
20 #include <cudf/strings/string_view.cuh>
21 #include <cudf/types.hpp>
22 #include <cudf/utilities/bit.hpp>
24 
25 #include <cuda/std/optional>
26 #include <cuda/std/type_traits>
27 
28 #include <algorithm>
29 #include <type_traits>
30 
36 namespace CUDF_EXPORT cudf {
37 
47 struct nullate {
48  struct YES : cuda::std::true_type {};
49  struct NO : cuda::std::false_type {};
55  struct DYNAMIC {
56  DYNAMIC() = delete;
65  constexpr explicit DYNAMIC(bool b) noexcept : value{b} {}
72  CUDF_HOST_DEVICE constexpr operator bool() const noexcept { return value; }
73  bool value;
74  };
75 };
76 
77 namespace detail {
89 class alignas(16) column_device_view_base {
90  public:
91  // TODO: merge this offsets column index with `strings_column_view::offsets_column_index`
92  static constexpr size_type offsets_column_index{0};
93 
94  column_device_view_base() = delete;
95  ~column_device_view_base() = default;
110 
127  template <typename T = void,
128  CUDF_ENABLE_IF(cuda::std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
129  [[nodiscard]] CUDF_HOST_DEVICE T const* head() const noexcept
130  {
131  return static_cast<T const*>(_data);
132  }
133 
149  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
150  [[nodiscard]] CUDF_HOST_DEVICE T const* data() const noexcept
151  {
152  return head<T>() + _offset;
153  }
154 
160  [[nodiscard]] CUDF_HOST_DEVICE size_type size() const noexcept { return _size; }
161 
167  [[nodiscard]] CUDF_HOST_DEVICE data_type type() const noexcept { return _type; }
168 
178  [[nodiscard]] CUDF_HOST_DEVICE bool nullable() const noexcept { return nullptr != _null_mask; }
179 
189  [[nodiscard]] CUDF_HOST_DEVICE bitmask_type const* null_mask() const noexcept
190  {
191  return _null_mask;
192  }
193 
200  [[nodiscard]] CUDF_HOST_DEVICE size_type offset() const noexcept { return _offset; }
201 
216  [[nodiscard]] __device__ bool is_valid(size_type element_index) const noexcept
217  {
218  return not nullable() or is_valid_nocheck(element_index);
219  }
220 
233  [[nodiscard]] __device__ bool is_valid_nocheck(size_type element_index) const noexcept
234  {
235  return bit_is_set(_null_mask, offset() + element_index);
236  }
237 
251  [[nodiscard]] __device__ bool is_null(size_type element_index) const noexcept
252  {
253  return not is_valid(element_index);
254  }
255 
267  [[nodiscard]] __device__ bool is_null_nocheck(size_type element_index) const noexcept
268  {
269  return not is_valid_nocheck(element_index);
270  }
271 
281  [[nodiscard]] __device__ bitmask_type get_mask_word(size_type word_index) const noexcept
282  {
283  return null_mask()[word_index];
284  }
285 
286  protected:
287  data_type _type{type_id::EMPTY};
288  cudf::size_type _size{};
289  void const* _data{};
290  bitmask_type const* _null_mask{};
292  size_type _offset{};
294 
305  size_type size,
306  void const* data,
307  bitmask_type const* null_mask,
308  size_type offset)
309  : _type{type}, _size{size}, _data{data}, _null_mask{null_mask}, _offset{offset}
310  {
311  }
312 
313  template <typename C, typename T, typename = void>
314  struct has_element_accessor_impl : cuda::std::false_type {};
315 
316  template <typename C, typename T>
317  struct has_element_accessor_impl<
318  C,
319  T,
320  void_t<decltype(cuda::std::declval<C>().template element<T>(cuda::std::declval<size_type>()))>>
321  : cuda::std::true_type {};
322 };
323 // @cond
324 // Forward declaration
325 template <typename T>
326 struct value_accessor;
327 template <typename T, typename Nullate>
328 struct optional_accessor;
329 template <typename T, bool has_nulls>
330 struct pair_accessor;
331 template <typename T, bool has_nulls>
332 struct pair_rep_accessor;
333 template <typename T>
334 struct mutable_value_accessor;
335 // @endcond
336 } // namespace detail
337 
345  public:
346  column_device_view_core() = delete;
347  ~column_device_view_core() = default;
362 
372  column_device_view_core(column_view column, void* h_ptr, void* d_ptr);
373 
391  size_type size) const noexcept
392  {
393  return column_device_view_core{this->type(),
394  size,
395  this->head(),
396  this->null_mask(),
397  this->offset() + offset,
398  d_children,
399  this->num_child_columns()};
400  }
401 
419  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
420  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
421  {
422  return data<T>()[element_index];
423  }
424 
436  template <typename T, CUDF_ENABLE_IF(cuda::std::is_same_v<T, string_view>)>
437  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
438  {
439  size_type index = element_index + offset(); // account for this view's _offset
440  char const* d_strings = static_cast<char const*>(_data);
441  auto const offsets = child(offsets_column_index);
442  auto const itr = cudf::detail::input_offsetalator(offsets.head(), offsets.type());
443  auto const offset = itr[index];
444  return string_view{d_strings + offset, static_cast<cudf::size_type>(itr[index + 1] - offset)};
445  }
446 
447  public:
458  template <typename T, CUDF_ENABLE_IF(cudf::is_fixed_point<T>())>
459  [[nodiscard]] __device__ T element(size_type element_index) const noexcept
460  {
461  using namespace numeric;
462  using rep = typename T::rep;
463  auto const scale = scale_type{_type.scale()};
464  return T{scaled_integer<rep>{data<rep>()[element_index], scale}};
465  }
466 
473  [[nodiscard]] __device__ column_device_view_core child(size_type child_index) const noexcept
474  {
475  return d_children[child_index];
476  }
477 
483  [[nodiscard]] CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
484  {
485  return _num_children;
486  }
487 
488  protected:
502  size_type size,
503  void const* data,
504  bitmask_type const* null_mask,
505  size_type offset,
506  column_device_view_core* children,
507  size_type num_children)
508  : column_device_view_base(type, size, data, null_mask, offset),
509  d_children(children),
510  _num_children(num_children)
511  {
512  }
513 
514  protected:
515  column_device_view_core* d_children{};
519  size_type _num_children{};
520 };
521 
529  public:
531  ~mutable_column_device_view_core() = default;
533  default;
535  default;
548 
565  template <typename T = void,
566  CUDF_ENABLE_IF(cuda::std::is_same_v<T, void> or is_rep_layout_compatible<T>())>
567  CUDF_HOST_DEVICE T* head() const noexcept
568  {
569  return const_cast<T*>(detail::column_device_view_base::head<T>());
570  }
571 
584  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
585  CUDF_HOST_DEVICE T* data() const noexcept
586  {
587  return const_cast<T*>(detail::column_device_view_base::data<T>());
588  }
589 
604  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
605  [[nodiscard]] __device__ T& element(size_type element_index) const noexcept
606  {
607  return data<T>()[element_index];
608  }
609 
617  template <typename T, CUDF_ENABLE_IF(is_rep_layout_compatible<T>())>
618  __device__ void assign(size_type element_index, T value) const noexcept
619  {
620  data<T>()[element_index] = value;
621  }
622 
631  template <typename T, CUDF_ENABLE_IF(is_fixed_point<T>())>
632  __device__ void assign(size_type element_index, T value) const noexcept
633  {
634  // consider asserting that the scale matches
635  using namespace numeric;
636  using rep = typename T::rep;
637  data<rep>()[element_index] = value.value();
638  }
639 
648  [[nodiscard]] CUDF_HOST_DEVICE bitmask_type* null_mask() const noexcept
649  {
650  return const_cast<bitmask_type*>(detail::column_device_view_base::null_mask());
651  }
652 
659  [[nodiscard]] __device__ mutable_column_device_view_core
660  child(size_type child_index) const noexcept
661  {
662  return d_children[child_index];
663  }
664 
665 #ifdef __CUDACC__ // because set_bit in bit.hpp is wrapped with __CUDACC__
680  __device__ void set_valid(size_type element_index) const noexcept
681  {
682  return set_bit(null_mask(), element_index);
683  }
684 
698  __device__ void set_null(size_type element_index) const noexcept
699  {
700  return clear_bit(null_mask(), element_index);
701  }
702 
703 #endif
704 
715  __device__ void set_mask_word(size_type word_index, bitmask_type new_word) const noexcept
716  {
717  null_mask()[word_index] = new_word;
718  }
719 
720  protected:
734  size_type size,
735  void const* data,
736  bitmask_type const* null_mask,
737  size_type offset,
739  size_type num_children)
740  : column_device_view_base(type, size, data, null_mask, offset),
741  d_children(children),
742  _num_children(num_children)
743  {
744  }
745 
750  size_type _num_children{};
751 };
752 
753 namespace detail {
754 
755 #ifdef __CUDACC__ // because set_bit in bit.hpp is wrapped with __CUDACC__
756 
763 __device__ inline bitmask_type get_mask_offset_word(bitmask_type const* __restrict__ source,
764  size_type destination_word_index,
765  size_type source_begin_bit,
766  size_type source_end_bit)
767 {
768  size_type source_word_index = destination_word_index + word_index(source_begin_bit);
769  bitmask_type curr_word = source[source_word_index];
770  bitmask_type next_word = 0;
771  if (word_index(source_end_bit - 1) >
772  word_index(source_begin_bit +
773  destination_word_index * detail::size_in_bits<bitmask_type>())) {
774  next_word = source[source_word_index + 1];
775  }
776  return __funnelshift_r(curr_word, next_word, source_begin_bit);
777 }
778 
779 #endif
780 
781 } // namespace detail
782 } // namespace CUDF_EXPORT cudf
Utilities for bit and bitmask operations.
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
column_device_view_core(column_device_view_core const &)=default
Copy constructor.
CUDF_HOST_DEVICE size_type num_child_columns() const noexcept
Returns the number of child columns.
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
column_device_view_core & operator=(column_device_view_core const &)=default
Copy assignment operator.
column_device_view_core(column_device_view_core &&)=default
Move constructor.
CUDF_HOST_DEVICE column_device_view_core slice(size_type offset, size_type size) const noexcept
Get a new raw_column_device_view which is a slice of this column.
CUDF_HOST_DEVICE column_device_view_core(data_type type, size_type size, void const *data, bitmask_type const *null_mask, size_type offset, column_device_view_core *children, size_type num_children)
Creates an instance of this class using pre-existing device memory pointers to data,...
column_device_view_core & operator=(column_device_view_core &&)=default
Move assignment operator.
column_device_view_core(column_view column, void *h_ptr, void *d_ptr)
Creates an instance of this class using the specified host memory pointer (h_ptr) to store child obje...
column_device_view_core child(size_type child_index) const noexcept
Returns the specified child.
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:47
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
CUDF_HOST_DEVICE T const * head() const noexcept
Returns pointer to the base device memory allocation casted to the specified type.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
CUDF_HOST_DEVICE column_device_view_base(data_type type, size_type size, void const *data, bitmask_type const *null_mask, size_type offset)
Constructs a column with the specified type, size, data, nullmask and offset.
column_device_view_base & operator=(column_device_view_base &&)=default
Move assignment operator.
CUDF_HOST_DEVICE size_type size() const noexcept
Returns the number of elements in the column.
bitmask_type get_mask_word(size_type word_index) const noexcept
Returns the specified bitmask word from the null_mask().
column_device_view_base(column_device_view_base &&)=default
Move constructor.
column_device_view_base(column_device_view_base const &)=default
Copy constructor.
CUDF_HOST_DEVICE bitmask_type const * null_mask() const noexcept
Returns raw pointer to the underlying bitmask allocation.
bool is_valid_nocheck(size_type element_index) const noexcept
Returns whether the specified element holds a valid value (i.e., not null)
CUDF_HOST_DEVICE T const * data() const noexcept
Returns the underlying data casted to the specified type, plus the offset.
column_device_view_base & operator=(column_device_view_base const &)=default
Copy assignment operator.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE size_type offset() const noexcept
Returns the index of the first element relative to the base memory allocation, i.e....
bool is_null_nocheck(size_type element_index) const noexcept
Returns whether the specified element is null.
CUDF_HOST_DEVICE bool nullable() const noexcept
Indicates whether the column can contain null elements, i.e., if it has an allocated bitmask.
bool is_valid(size_type element_index) const noexcept
Returns whether the specified element holds a valid value (i.e., not null).
A mutable, non-owning view of device data as a column of elements that is trivially copyable and usab...
mutable_column_device_view_core(mutable_column_device_view_core const &)=default
Copy constructor.
mutable_column_device_view_core child(size_type child_index) const noexcept
Returns the specified child.
void assign(size_type element_index, T value) const noexcept
Assigns value to the element at element_index
CUDF_HOST_DEVICE T * data() const noexcept
Returns the underlying data casted to the specified type, plus the offset.
CUDF_HOST_DEVICE mutable_column_device_view_core(data_type type, size_type size, void const *data, bitmask_type const *null_mask, size_type offset, mutable_column_device_view_core *children, size_type num_children)
Creates an instance of this class using pre-existing device memory pointers to data,...
void set_mask_word(size_type word_index, bitmask_type new_word) const noexcept
Updates the specified bitmask word in the null_mask() with a new word.
T & element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE T * head() const noexcept
Returns pointer to the base device memory allocation casted to the specified type.
mutable_column_device_view_core & operator=(mutable_column_device_view_core const &)=default
Copy assignment operator.
mutable_column_device_view_core & operator=(mutable_column_device_view_core &&)=default
Move assignment operator.
mutable_column_device_view_core(mutable_column_device_view_core &&)=default
Move constructor.
CUDF_HOST_DEVICE bitmask_type * null_mask() const noexcept
Returns raw pointer to the underlying bitmask allocation.
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
Class definition for fixed point data type.
scale_type
The scale type for fixed_point.
Definition: fixed_point.hpp:43
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
constexpr CUDF_HOST_DEVICE size_type word_index(size_type bit_index)
Returns the index of the word containing the specified bit.
Definition: bit.hpp:74
CUDF_HOST_DEVICE bool bit_is_set(bitmask_type const *bitmask, size_type bit_index)
Indicates whether the specified bit is set to 1
Definition: bit.hpp:128
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:96
void void_t
Utility metafunction that maps a sequence of any types to the type void.
Definition: traits.hpp:37
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:50
cuDF interfaces
Definition: host_udf.hpp:37
bool nullable(table_view const &view)
Returns True if any of the columns in the table is nullable. (not entire hierarchy)
fixed_point and supporting types
Definition: fixed_point.hpp:33
nullate::DYNAMIC defers the determination of nullability to run time rather than compile time....
bool value
True if nulls are expected.
constexpr DYNAMIC(bool b) noexcept
Create a runtime nullate object.
Indicates the presence of nulls at compile-time or runtime.
Helper struct for constructing fixed_point when value is already shifted.
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32