column_wrapper.hpp
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf_test/column_utilities.hpp>
9 #include <cudf_test/default_stream.hpp>
10 
11 #include <cudf/column/column.hpp>
13 #include <cudf/copying.hpp>
14 #include <cudf/detail/concatenate.hpp>
15 #include <cudf/detail/iterator.cuh>
16 #include <cudf/detail/utilities/vector_factories.hpp>
20 #include <cudf/null_mask.hpp>
21 #include <cudf/types.hpp>
22 #include <cudf/utilities/bit.hpp>
24 #include <cudf/utilities/export.hpp>
28 
29 #include <rmm/device_buffer.hpp>
30 
31 #include <cuda/iterator>
32 #include <cuda/std/functional>
33 #include <thrust/copy.h>
34 #include <thrust/host_vector.h>
35 #include <thrust/iterator/transform_iterator.h>
36 
37 #include <algorithm>
38 #include <iterator>
39 #include <memory>
40 #include <numeric>
41 
42 namespace CUDF_EXPORT cudf {
43 namespace test {
44 namespace detail {
54  public:
62  operator column_view() const { return wrapped->view(); }
63 
71  operator mutable_column_view() { return wrapped->mutable_view(); }
72 
78  std::unique_ptr<cudf::column> release() { return std::move(wrapped); }
79 
80  protected:
81  std::unique_ptr<cudf::column> wrapped{};
82 };
83 
87 template <typename From, typename To>
97  template <typename FromT = From,
98  typename ToT = To,
99  std::enable_if_t<std::is_same_v<FromT, ToT>, void>* = nullptr>
100  constexpr ToT operator()(FromT element) const
101  {
102  return element;
103  }
104 
113  template <
114  typename FromT = From,
115  typename ToT = To,
116  std::enable_if_t<!std::is_same_v<FromT, ToT> && (cudf::is_convertible<FromT, ToT>::value ||
117  std::is_constructible_v<ToT, FromT>),
118  void>* = nullptr>
119  constexpr ToT operator()(FromT element) const
120  {
121  return static_cast<ToT>(element);
122  }
123 
132  template <
133  typename FromT = From,
134  typename ToT = To,
135  std::enable_if_t<std::is_integral_v<FromT> && cudf::is_timestamp<ToT>(), void>* = nullptr>
136  constexpr ToT operator()(FromT element) const
137  {
138  return ToT{typename ToT::duration{element}};
139  }
140 };
141 
152 template <typename ElementTo,
153  typename ElementFrom,
154  typename InputIterator,
155  std::enable_if_t<not cudf::is_fixed_point<ElementTo>()>* = nullptr>
156 rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
157 {
158  static_assert(cudf::is_fixed_width<ElementTo>(), "Unexpected non-fixed width type.");
159  auto transformer = fixed_width_type_converter<ElementFrom, ElementTo>{};
160  auto transform_begin = thrust::make_transform_iterator(begin, transformer);
161  auto const size = cudf::distance(begin, end);
162  auto const elements = thrust::host_vector<ElementTo>(transform_begin, transform_begin + size);
163  return rmm::device_buffer{
164  elements.data(), size * sizeof(ElementTo), cudf::test::get_default_stream()};
165 }
166 
167 // The two signatures below are identical to the above overload apart from
168 // SFINAE so doxygen sees it as a duplicate.
170 
181 template <typename ElementTo,
182  typename ElementFrom,
183  typename InputIterator,
184  std::enable_if_t<not cudf::is_fixed_point<ElementFrom>() and
185  cudf::is_fixed_point<ElementTo>()>* = nullptr>
186 rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
187 {
188  using RepType = typename ElementTo::rep;
189  auto transformer = fixed_width_type_converter<ElementFrom, RepType>{};
190  auto transform_begin = thrust::make_transform_iterator(begin, transformer);
191  auto const size = cudf::distance(begin, end);
192  auto const elements = thrust::host_vector<RepType>(transform_begin, transform_begin + size);
193  return rmm::device_buffer{
194  elements.data(), size * sizeof(RepType), cudf::test::get_default_stream()};
195 }
196 
207 template <typename ElementTo,
208  typename ElementFrom,
209  typename InputIterator,
210  std::enable_if_t<cudf::is_fixed_point<ElementFrom>() and
211  cudf::is_fixed_point<ElementTo>()>* = nullptr>
212 rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
213 {
214  using namespace numeric;
215  using RepType = typename ElementTo::rep;
216 
217  CUDF_EXPECTS(std::all_of(begin, end, [](ElementFrom v) { return v.scale() == 0; }),
218  "Only zero-scale fixed-point values are supported");
219 
220  auto to_rep = [](ElementTo fp) { return fp.value(); };
221  auto transformer_begin = thrust::make_transform_iterator(begin, to_rep);
222  auto const size = cudf::distance(begin, end);
223  auto const elements = thrust::host_vector<RepType>(transformer_begin, transformer_begin + size);
224  return rmm::device_buffer{
225  elements.data(), size * sizeof(RepType), cudf::test::get_default_stream()};
226 }
228 
242 template <typename ValidityIterator>
243 std::pair<std::vector<bitmask_type>, cudf::size_type> make_null_mask_vector(ValidityIterator begin,
244  ValidityIterator end)
245 {
246  auto const size = cudf::distance(begin, end);
247  auto const num_words = cudf::bitmask_allocation_size_bytes(size) / sizeof(bitmask_type);
248 
249  auto null_mask = std::vector<bitmask_type>(num_words, 0);
250  auto null_count = cudf::size_type{0};
251  for (auto i = 0; i < size; ++i) {
252  if (*(begin + i)) {
253  set_bit_unsafe(null_mask.data(), i);
254  } else {
255  ++null_count;
256  }
257  }
258 
259  return {std::move(null_mask), null_count};
260 }
261 
275 template <typename ValidityIterator>
276 std::pair<rmm::device_buffer, cudf::size_type> make_null_mask(ValidityIterator begin,
277  ValidityIterator end)
278 {
279  auto [null_mask, null_count] = make_null_mask_vector(begin, end);
280  auto d_mask = rmm::device_buffer{null_mask.data(),
283  return {std::move(d_mask), null_count};
284 }
285 
300 template <typename StringsIterator, typename ValidityIterator>
301 auto make_chars_and_offsets(StringsIterator begin, StringsIterator end, ValidityIterator v)
302 {
303  std::vector<char> chars{};
304  std::vector<cudf::size_type> offsets(1, 0);
305  for (auto str = begin; str < end; ++str) {
306  std::string tmp = (*v++) ? std::string(*str) : std::string{};
307  chars.insert(chars.end(), std::cbegin(tmp), std::cend(tmp));
308  auto const last_offset = static_cast<std::size_t>(offsets.back());
309  auto const next_offset = last_offset + tmp.length();
310  CUDF_EXPECTS(
311  next_offset < static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max()),
312  "Cannot use strings_column_wrapper to build a large strings column");
313  offsets.push_back(static_cast<cudf::size_type>(next_offset));
314  }
315  return std::pair(std::move(chars), std::move(offsets));
316 };
317 } // namespace detail
318 
327 template <typename ElementTo, typename SourceElementT = ElementTo>
329  public:
333  fixed_width_column_wrapper() : column_wrapper{}
334  {
335  std::vector<ElementTo> empty;
336  wrapped.reset(
337  new cudf::column{cudf::data_type{cudf::type_to_id<ElementTo>()},
338  0,
339  detail::make_elements<ElementTo, SourceElementT>(empty.begin(), empty.end()),
341  0});
342  }
343 
362  template <typename InputIterator>
363  fixed_width_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{}
364  {
365  auto const size = cudf::distance(begin, end);
366  wrapped.reset(new cudf::column{cudf::data_type{cudf::type_to_id<ElementTo>()},
367  size,
368  detail::make_elements<ElementTo, SourceElementT>(begin, end),
370  0});
371  }
372 
396  template <typename InputIterator, typename ValidityIterator>
397  fixed_width_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
398  : column_wrapper{}
399  {
400  auto const size = cudf::distance(begin, end);
401  auto [null_mask, null_count] = detail::make_null_mask(v, v + size);
402  wrapped.reset(new cudf::column{cudf::data_type{cudf::type_to_id<ElementTo>()},
403  size,
404  detail::make_elements<ElementTo, SourceElementT>(begin, end),
405  std::move(null_mask),
406  null_count});
407  }
408 
421  template <typename ElementFrom>
422  fixed_width_column_wrapper(std::initializer_list<ElementFrom> elements)
423  : fixed_width_column_wrapper(std::cbegin(elements), std::cend(elements))
424  {
425  }
426 
444  template <typename ElementFrom>
445  fixed_width_column_wrapper(std::initializer_list<ElementFrom> elements,
446  std::initializer_list<bool> validity)
447  : fixed_width_column_wrapper(std::cbegin(elements), std::cend(elements), std::cbegin(validity))
448  {
449  }
450 
468  template <typename ValidityIterator, typename ElementFrom>
469  fixed_width_column_wrapper(std::initializer_list<ElementFrom> element_list, ValidityIterator v)
470  : fixed_width_column_wrapper(std::cbegin(element_list), std::cend(element_list), v)
471  {
472  }
473 
492  template <typename InputIterator>
493  fixed_width_column_wrapper(InputIterator begin,
494  InputIterator end,
495  std::initializer_list<bool> const& validity)
496  : fixed_width_column_wrapper(begin, end, std::cbegin(validity))
497  {
498  }
499 
517  template <typename ElementFrom>
518  fixed_width_column_wrapper(std::initializer_list<std::pair<ElementFrom, bool>> elements)
519  {
520  auto begin =
521  thrust::make_transform_iterator(elements.begin(), [](auto const& e) { return e.first; });
522  auto end = begin + elements.size();
523  auto v =
524  thrust::make_transform_iterator(elements.begin(), [](auto const& e) { return e.second; });
526  }
527 };
528 
534 template <typename Rep>
536  public:
553  template <typename FixedPointRepIterator>
554  fixed_point_column_wrapper(FixedPointRepIterator begin,
555  FixedPointRepIterator end,
556  numeric::scale_type scale)
557  : column_wrapper{}
558  {
559  CUDF_EXPECTS(numeric::is_supported_representation_type<Rep>(), "not valid representation type");
560 
561  auto const size = cudf::distance(begin, end);
562  auto const elements = thrust::host_vector<Rep>(begin, end);
563  auto const id = type_to_id<numeric::fixed_point<Rep, numeric::Radix::BASE_10>>();
564  auto const data_type = cudf::data_type{id, static_cast<int32_t>(scale)};
565 
566  wrapped.reset(new cudf::column{
567  data_type,
568  size,
569  rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::test::get_default_stream()},
571  0});
572  }
573 
586  fixed_point_column_wrapper(std::initializer_list<Rep> values, numeric::scale_type scale)
587  : fixed_point_column_wrapper(std::cbegin(values), std::cend(values), scale)
588  {
589  }
590 
618  template <typename FixedPointRepIterator, typename ValidityIterator>
619  fixed_point_column_wrapper(FixedPointRepIterator begin,
620  FixedPointRepIterator end,
621  ValidityIterator v,
622  numeric::scale_type scale)
623  : column_wrapper{}
624  {
625  CUDF_EXPECTS(numeric::is_supported_representation_type<Rep>(), "not valid representation type");
626 
627  auto const size = cudf::distance(begin, end);
628  auto const elements = thrust::host_vector<Rep>(begin, end);
629  auto const id = type_to_id<numeric::fixed_point<Rep, numeric::Radix::BASE_10>>();
630  auto const data_type = cudf::data_type{id, static_cast<int32_t>(scale)};
631  auto [null_mask, null_count] = detail::make_null_mask(v, v + size);
632  wrapped.reset(new cudf::column{
633  data_type,
634  size,
635  rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::test::get_default_stream()},
636  std::move(null_mask),
637  null_count});
638  }
639 
657  fixed_point_column_wrapper(std::initializer_list<Rep> elements,
658  std::initializer_list<bool> validity,
659  numeric::scale_type scale)
661  std::cbegin(elements), std::cend(elements), std::cbegin(validity), scale)
662  {
663  }
664 
683  template <typename ValidityIterator>
684  fixed_point_column_wrapper(std::initializer_list<Rep> element_list,
685  ValidityIterator v,
686  numeric::scale_type scale)
687  : fixed_point_column_wrapper(std::cbegin(element_list), std::cend(element_list), v, scale)
688  {
689  }
690 
711  template <typename FixedPointRepIterator>
712  fixed_point_column_wrapper(FixedPointRepIterator begin,
713  FixedPointRepIterator end,
714  std::initializer_list<bool> const& validity,
715  numeric::scale_type scale)
716  : fixed_point_column_wrapper(begin, end, std::cbegin(validity), scale)
717  {
718  }
719 };
720 
725  public:
729  strings_column_wrapper() : strings_column_wrapper(std::initializer_list<std::string>{}) {}
730 
751  template <typename StringsIterator>
752  strings_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
753  {
754  size_type num_strings = std::distance(begin, end);
755  if (num_strings == 0) {
757  return;
758  }
759  auto all_valid = cuda::make_constant_iterator(true);
760  auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
761  auto d_chars = cudf::detail::make_device_uvector_async(
763  auto d_offsets = std::make_unique<cudf::column>(
764  cudf::detail::make_device_uvector(
767  0);
768  wrapped =
769  cudf::make_strings_column(num_strings, std::move(d_offsets), d_chars.release(), 0, {});
770  }
771 
800  template <typename StringsIterator, typename ValidityIterator>
801  strings_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
802  : column_wrapper{}
803  {
804  size_type num_strings = std::distance(begin, end);
805  if (num_strings == 0) {
807  return;
808  }
809  auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
810  auto [null_mask, null_count] = detail::make_null_mask_vector(v, v + num_strings);
811  auto d_chars = cudf::detail::make_device_uvector_async(
813  auto d_offsets = std::make_unique<cudf::column>(
814  cudf::detail::make_device_uvector_async(
817  0);
818  auto d_bitmask = cudf::detail::make_device_uvector(
820  wrapped = cudf::make_strings_column(
821  num_strings, std::move(d_offsets), d_chars.release(), null_count, d_bitmask.release());
822  }
823 
836  strings_column_wrapper(std::initializer_list<std::string> strings)
837  : strings_column_wrapper(std::cbegin(strings), std::cend(strings))
838  {
839  }
840 
859  template <typename ValidityIterator>
860  strings_column_wrapper(std::initializer_list<std::string> strings, ValidityIterator v)
861  : strings_column_wrapper(std::cbegin(strings), std::cend(strings), v)
862  {
863  }
864 
880  strings_column_wrapper(std::initializer_list<std::string> strings,
881  std::initializer_list<bool> validity)
882  : strings_column_wrapper(std::cbegin(strings), std::cend(strings), std::cbegin(validity))
883  {
884  }
885 
906  strings_column_wrapper(std::initializer_list<std::pair<std::string, bool>> strings)
907  {
908  auto begin =
909  thrust::make_transform_iterator(strings.begin(), [](auto const& s) { return s.first; });
910  auto end = begin + strings.size();
911  auto v =
912  thrust::make_transform_iterator(strings.begin(), [](auto const& s) { return s.second; });
913  wrapped = strings_column_wrapper(begin, end, v).release();
914  }
915 };
916 
925 template <typename KeyElementTo, typename SourceElementT = KeyElementTo>
927  public:
931  operator dictionary_column_view() const { return cudf::dictionary_column_view{wrapped->view()}; }
932 
936  dictionary_column_wrapper() : column_wrapper{}
937  {
939  }
940 
960  template <typename InputIterator>
961  dictionary_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{}
962  {
963  wrapped =
965  cudf::data_type{type_id::INT32},
968  }
969 
995  template <typename InputIterator, typename ValidityIterator>
996  dictionary_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
997  : column_wrapper{}
998  {
999  wrapped = cudf::dictionary::encode(
1001  cudf::data_type{type_id::INT32},
1003  }
1004 
1018  template <typename ElementFrom>
1019  dictionary_column_wrapper(std::initializer_list<ElementFrom> elements)
1020  : dictionary_column_wrapper(std::cbegin(elements), std::cend(elements))
1021  {
1022  }
1023 
1042  template <typename ElementFrom>
1043  dictionary_column_wrapper(std::initializer_list<ElementFrom> elements,
1044  std::initializer_list<bool> validity)
1045  : dictionary_column_wrapper(std::cbegin(elements), std::cend(elements), std::cbegin(validity))
1046  {
1047  }
1048 
1067  template <typename ValidityIterator, typename ElementFrom>
1068  dictionary_column_wrapper(std::initializer_list<ElementFrom> element_list, ValidityIterator v)
1069  : dictionary_column_wrapper(std::cbegin(element_list), std::cend(element_list), v)
1070  {
1071  }
1072 
1093  template <typename InputIterator>
1094  dictionary_column_wrapper(InputIterator begin,
1095  InputIterator end,
1096  std::initializer_list<bool> const& validity)
1097  : dictionary_column_wrapper(begin, end, std::cbegin(validity))
1098  {
1099  }
1100 };
1101 
1107 template <>
1109  public:
1114  operator dictionary_column_view() const { return cudf::dictionary_column_view{wrapped->view()}; }
1115 
1121  [[nodiscard]] column_view keys() const
1122  {
1123  return cudf::dictionary_column_view{wrapped->view()}.keys();
1124  }
1125 
1131  [[nodiscard]] column_view indices() const
1132  {
1133  return cudf::dictionary_column_view{wrapped->view()}.indices();
1134  }
1135 
1139  dictionary_column_wrapper() : dictionary_column_wrapper(std::initializer_list<std::string>{}) {}
1140 
1161  template <typename StringsIterator>
1162  dictionary_column_wrapper(StringsIterator begin, StringsIterator end) : column_wrapper{}
1163  {
1164  wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end),
1165  cudf::data_type{type_id::INT32},
1168  }
1169 
1198  template <typename StringsIterator, typename ValidityIterator>
1199  dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
1200  : column_wrapper{}
1201  {
1202  wrapped = cudf::dictionary::encode(strings_column_wrapper(begin, end, v),
1203  cudf::data_type{type_id::INT32},
1205  }
1206 
1219  dictionary_column_wrapper(std::initializer_list<std::string> strings)
1220  : dictionary_column_wrapper(std::cbegin(strings), std::cend(strings))
1221  {
1222  }
1223 
1242  template <typename ValidityIterator>
1243  dictionary_column_wrapper(std::initializer_list<std::string> strings, ValidityIterator v)
1244  : dictionary_column_wrapper(std::cbegin(strings), std::cend(strings), v)
1245  {
1246  }
1247 
1263  dictionary_column_wrapper(std::initializer_list<std::string> strings,
1264  std::initializer_list<bool> validity)
1265  : dictionary_column_wrapper(std::cbegin(strings), std::cend(strings), std::cbegin(validity))
1266  {
1267  }
1268 };
1269 
1305 template <typename T, typename SourceElementT = T>
1307  public:
1311  operator lists_column_view() const { return cudf::lists_column_view{wrapped->view()}; }
1312 
1326  template <typename Element = T, std::enable_if_t<cudf::is_fixed_width<Element>()>* = nullptr>
1327  lists_column_wrapper(std::initializer_list<SourceElementT> elements) : column_wrapper{}
1328  {
1329  build_from_non_nested(
1331  }
1332 
1348  template <typename Element = T,
1349  typename InputIterator,
1350  std::enable_if_t<cudf::is_fixed_width<Element>()>* = nullptr>
1351  lists_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{}
1352  {
1353  build_from_non_nested(
1355  }
1356 
1372  template <typename Element = T,
1373  typename ValidityIterator,
1374  std::enable_if_t<cudf::is_fixed_width<Element>()>* = nullptr>
1375  lists_column_wrapper(std::initializer_list<SourceElementT> elements, ValidityIterator v)
1376  : column_wrapper{}
1377  {
1378  build_from_non_nested(
1380  }
1381 
1399  template <typename Element = T,
1400  typename InputIterator,
1401  typename ValidityIterator,
1402  std::enable_if_t<cudf::is_fixed_width<Element>()>* = nullptr>
1403  lists_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
1404  : column_wrapper{}
1405  {
1406  build_from_non_nested(
1408  }
1409 
1423  template <typename Element = T,
1424  std::enable_if_t<std::is_same_v<Element, cudf::string_view>>* = nullptr>
1425  lists_column_wrapper(std::initializer_list<std::string> elements) : column_wrapper{}
1426  {
1427  build_from_non_nested(
1428  cudf::test::strings_column_wrapper(elements.begin(), elements.end()).release());
1429  }
1430 
1446  template <typename Element = T,
1447  typename ValidityIterator,
1448  std::enable_if_t<std::is_same_v<Element, cudf::string_view>>* = nullptr>
1449  lists_column_wrapper(std::initializer_list<std::string> elements, ValidityIterator v)
1450  : column_wrapper{}
1451  {
1452  build_from_non_nested(
1453  cudf::test::strings_column_wrapper(elements.begin(), elements.end(), v).release());
1454  }
1455 
1478  : column_wrapper{}
1479  {
1480  std::vector<bool> valids;
1481  build_from_nested(elements, valids);
1482  }
1483 
1495  lists_column_wrapper() : column_wrapper{}
1496  {
1497  build_from_non_nested(make_empty_column(cudf::type_to_id<T>()));
1498  }
1499 
1525  template <typename ValidityIterator>
1527  ValidityIterator v)
1528  : column_wrapper{}
1529  {
1530  std::vector<bool> validity;
1531  std::transform(elements.begin(),
1532  elements.end(),
1533  v,
1534  std::back_inserter(validity),
1535  [](lists_column_wrapper const& l, bool valid) { return valid; });
1536  build_from_nested(elements, validity);
1537  }
1538 
1546  {
1549  return lists_column_wrapper<T>(
1550  1,
1551  offsets.release(),
1552  values.release(),
1553  valid ? 0 : 1,
1555  }
1556 
1557  private:
1568  std::unique_ptr<cudf::column>&& offsets,
1569  std::unique_ptr<cudf::column>&& values,
1571  rmm::device_buffer&& null_mask)
1572  {
1573  // construct the list column
1574  wrapped = make_lists_column(
1575  num_rows, std::move(offsets), std::move(values), null_count, std::move(null_mask));
1576  }
1577 
1594  void build_from_nested(std::initializer_list<lists_column_wrapper<T, SourceElementT>> elements,
1595  std::vector<bool> const& v)
1596  {
1597  auto valids = cudf::detail::make_counting_transform_iterator(
1598  0, [&v](auto i) { return v.empty() ? true : v[i]; });
1599 
1600  // compute the expected hierarchy and depth
1601  auto const hierarchy_and_depth =
1602  std::accumulate(elements.begin(),
1603  elements.end(),
1604  std::pair<column_view, int32_t>{{}, -1},
1605  [](auto acc, lists_column_wrapper const& lcw) {
1606  return lcw.depth > acc.second ? std::pair(lcw.get_view(), lcw.depth) : acc;
1607  });
1608  column_view expected_hierarchy = hierarchy_and_depth.first;
1609  int32_t const expected_depth = hierarchy_and_depth.second;
1610 
1611  // preprocess columns so that every column_view in 'cols' is an equivalent hierarchy
1612  auto [cols, stubs] = preprocess_columns(elements, expected_hierarchy, expected_depth);
1613 
1614  // generate offsets
1615  size_type count = 0;
1616  std::vector<size_type> offsetv;
1617  std::transform(cols.cbegin(),
1618  cols.cend(),
1619  valids,
1620  std::back_inserter(offsetv),
1621  [&](cudf::column_view const& col, bool valid) {
1622  // nulls are represented as a repeated offset
1623  size_type ret = count;
1624  if (valid) { count += col.size(); }
1625  return ret;
1626  });
1627  // add the final offset
1628  offsetv.push_back(count);
1629  auto offsets =
1630  cudf::test::fixed_width_column_wrapper<size_type>(offsetv.begin(), offsetv.end()).release();
1631 
1632  // concatenate them together, skipping children that are null.
1633  std::vector<column_view> children;
1634  thrust::copy_if(std::cbegin(cols),
1635  std::cend(cols),
1636  valids,
1637  std::back_inserter(children),
1638  cuda::std::identity{});
1639 
1640  auto data = children.empty() ? cudf::empty_like(expected_hierarchy)
1641  : cudf::concatenate(children,
1642  cudf::test::get_default_stream(),
1644 
1645  // increment depth
1646  depth = expected_depth + 1;
1647 
1648  auto [null_mask, null_count] = [&] {
1649  if (v.size() <= 0) return std::make_pair(rmm::device_buffer{}, cudf::size_type{0});
1650  return cudf::test::detail::make_null_mask(v.begin(), v.end());
1651  }();
1652 
1653  // construct the list column
1654  wrapped = make_lists_column(
1655  cols.size(), std::move(offsets), std::move(data), null_count, std::move(null_mask));
1656  }
1657 
1665  void build_from_non_nested(std::unique_ptr<column> c)
1666  {
1667  CUDF_EXPECTS(c->type().id() == type_id::EMPTY || !cudf::is_nested(c->type()),
1668  "Unexpected type");
1669 
1670  std::vector<size_type> offsetv;
1671  if (c->size() > 0) {
1672  offsetv.push_back(0);
1673  offsetv.push_back(c->size());
1674  }
1675  auto offsets =
1676  cudf::test::fixed_width_column_wrapper<size_type>(offsetv.begin(), offsetv.end()).release();
1677 
1678  // construct the list column. mark this as a root
1679  root = true;
1680  depth = 0;
1681 
1682  size_type num_elements = offsets->size() == 0 ? 0 : offsets->size() - 1;
1683  wrapped =
1684  make_lists_column(num_elements, std::move(offsets), std::move(c), 0, rmm::device_buffer{});
1685  }
1686 
1721  std::unique_ptr<column> normalize_column(column_view const& col,
1722  column_view const& expected_hierarchy)
1723  {
1724  // if are at the bottom of the short column, it must be empty
1725  if (col.type().id() != type_id::LIST) {
1726  CUDF_EXPECTS(col.is_empty(), "Encountered mismatched column!");
1727 
1728  auto remainder = empty_like(expected_hierarchy);
1729  return remainder;
1730  }
1731 
1732  lists_column_view lcv(col);
1733  return make_lists_column(
1734  col.size(),
1735  std::make_unique<column>(lcv.offsets()),
1736  normalize_column(lists_column_view(col).child(),
1737  lists_column_view(expected_hierarchy).child()),
1738  col.null_count(),
1741  }
1742 
1743  std::pair<std::vector<column_view>, std::vector<std::unique_ptr<column>>> preprocess_columns(
1744  std::initializer_list<lists_column_wrapper<T, SourceElementT>> const& elements,
1745  column_view& expected_hierarchy,
1746  int expected_depth)
1747  {
1748  std::vector<std::unique_ptr<column>> stubs;
1749  std::vector<column_view> cols;
1750 
1751  // preprocess the incoming lists.
1752  // - unwrap any "root" lists
1753  // - handle incomplete hierarchies
1754  std::transform(elements.begin(),
1755  elements.end(),
1756  std::back_inserter(cols),
1757  [&](lists_column_wrapper const& l) -> column_view {
1758  // depth mismatch. attempt to normalize the short column.
1759  // this function will also catch if this is a legitimately broken
1760  // set of input
1761  if (l.depth < expected_depth) {
1762  if (l.root) {
1763  // this exception distinguishes between the following two cases:
1764  //
1765  // { {{{1, 2, 3}}}, {} }
1766  // In this case, row 0 is a List<List<List<int>>>, whereas row 1 is
1767  // just a List<> which is an apparent mismatch. However, because row 1
1768  // is empty we will allow that to semantically mean
1769  // "a List<List<List<int>>> that's empty at the top level"
1770  //
1771  // { {{{1, 2, 3}}}, {4, 5, 6} }
1772  // In this case, row 1 is a concrete List<int> with actual values.
1773  // There is no way to rectify the differences so we will treat it as a
1774  // true column mismatch.
1775  CUDF_EXPECTS(l.wrapped->size() == 0, "Mismatch in column types!");
1776  stubs.push_back(empty_like(expected_hierarchy));
1777  } else {
1778  stubs.push_back(normalize_column(l.get_view(), expected_hierarchy));
1779  }
1780  return *(stubs.back());
1781  }
1782  // the empty hierarchy case
1783  return l.get_view();
1784  });
1785 
1786  return {std::move(cols), std::move(stubs)};
1787  }
1788 
1789  [[nodiscard]] column_view get_view() const
1790  {
1791  return root ? lists_column_view(*wrapped).child() : *wrapped;
1792  }
1793 
1794  int depth = 0;
1795  bool root = false;
1796 };
1797 
1802  public:
1830  structs_column_wrapper(std::vector<std::unique_ptr<cudf::column>>&& child_columns,
1831  std::vector<bool> const& validity = {})
1832  {
1833  init(std::move(child_columns), validity);
1834  }
1835 
1857  std::initializer_list<std::reference_wrapper<detail::column_wrapper>> child_column_wrappers,
1858  std::vector<bool> const& validity = {})
1859  {
1860  std::vector<std::unique_ptr<cudf::column>> child_columns;
1861  child_columns.reserve(child_column_wrappers.size());
1862  std::transform(child_column_wrappers.begin(),
1863  child_column_wrappers.end(),
1864  std::back_inserter(child_columns),
1865  [&](auto const& column_wrapper) {
1866  return std::make_unique<cudf::column>(column_wrapper.get(),
1867  cudf::test::get_default_stream());
1868  });
1869  init(std::move(child_columns), validity);
1870  }
1871 
1892  template <typename V>
1894  std::initializer_list<std::reference_wrapper<detail::column_wrapper>> child_column_wrappers,
1895  V validity_iter)
1896  {
1897  std::vector<std::unique_ptr<cudf::column>> child_columns;
1898  child_columns.reserve(child_column_wrappers.size());
1899  std::transform(child_column_wrappers.begin(),
1900  child_column_wrappers.end(),
1901  std::back_inserter(child_columns),
1902  [&](auto const& column_wrapper) {
1903  return std::make_unique<cudf::column>(column_wrapper.get(),
1904  cudf::test::get_default_stream());
1905  });
1906  init(std::move(child_columns), validity_iter);
1907  }
1908 
1909  private:
1910  void init(std::vector<std::unique_ptr<cudf::column>>&& child_columns,
1911  std::vector<bool> const& validity)
1912  {
1913  size_type num_rows = child_columns.empty() ? 0 : child_columns[0]->size();
1914 
1915  CUDF_EXPECTS(std::all_of(child_columns.begin(),
1916  child_columns.end(),
1917  [&](auto const& p_column) { return p_column->size() == num_rows; }),
1918  "All struct member columns must have the same row count.");
1919 
1920  CUDF_EXPECTS(validity.size() <= 0 || static_cast<size_type>(validity.size()) == num_rows,
1921  "Validity buffer must have as many elements as rows in the struct column.");
1922 
1923  auto [null_mask, null_count] = [&] {
1924  if (validity.size() <= 0) return std::make_pair(rmm::device_buffer{}, cudf::size_type{0});
1925  return cudf::test::detail::make_null_mask(validity.begin(), validity.end());
1926  }();
1927 
1928  wrapped = cudf::make_structs_column(num_rows,
1929  std::move(child_columns),
1930  null_count,
1931  std::move(null_mask),
1934  }
1935 
1936  template <typename V>
1937  void init(std::vector<std::unique_ptr<cudf::column>>&& child_columns, V validity_iterator)
1938  {
1939  size_type const num_rows = child_columns.empty() ? 0 : child_columns[0]->size();
1940 
1941  CUDF_EXPECTS(std::all_of(child_columns.begin(),
1942  child_columns.end(),
1943  [&](auto const& p_column) { return p_column->size() == num_rows; }),
1944  "All struct member columns must have the same row count.");
1945 
1946  std::vector<bool> validity(num_rows);
1947  std::copy(validity_iterator, validity_iterator + num_rows, validity.begin());
1948 
1949  init(std::move(child_columns), validity);
1950  }
1951 };
1952 
1953 } // namespace test
1954 } // namespace CUDF_EXPORT cudf
Utilities for bit and bitmask operations.
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:36
Indicator for the logical data type of an element in a column.
Definition: types.hpp:277
A wrapper class for operations on a dictionary column.
column_view indices() const noexcept
Returns the column of indices.
column_view keys() const noexcept
Returns the column of keys.
Given a column-view of lists type, an instance of this class provides a wrapper on this compound colu...
A non-owning, mutable view of device data as a column of elements, some of which may be null as indic...
Base class for a wrapper around a cudf::column.
std::unique_ptr< cudf::column > release()
Releases internal unique_ptr to wrapped column.
dictionary_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
Construct a nullable dictionary column of strings from the range [begin,end) using the range [v,...
dictionary_column_wrapper(std::initializer_list< std::string > strings, std::initializer_list< bool > validity)
Construct a nullable dictionary column of strings from a list of strings and a list of booleans to in...
column_view indices() const
Access indices column view.
dictionary_column_wrapper(std::initializer_list< std::string > strings)
Construct a non-nullable dictionary column of strings from a list of strings.
dictionary_column_wrapper(StringsIterator begin, StringsIterator end)
Construct a non-nullable dictionary column of strings from the range [begin,end).
dictionary_column_wrapper(std::initializer_list< std::string > strings, ValidityIterator v)
Construct a nullable dictionary column of strings from a list of strings and the range [v,...
column_view keys() const
Access keys column view.
dictionary_column_wrapper()
Default constructor initializes an empty dictionary column of strings.
column_wrapper derived class for wrapping dictionary columns.
dictionary_column_wrapper(std::initializer_list< ElementFrom > elements)
Construct a non-nullable dictionary column of fixed-width elements from an initializer list.
dictionary_column_wrapper(std::initializer_list< ElementFrom > elements, std::initializer_list< bool > validity)
Construct a nullable dictionary column from a list of fixed-width elements using another list to indi...
dictionary_column_wrapper(std::initializer_list< ElementFrom > element_list, ValidityIterator v)
Construct a nullable dictionary column from a list of fixed-width elements and the range [v,...
dictionary_column_wrapper()
Default constructor initializes an empty column with dictionary type.
dictionary_column_wrapper(InputIterator begin, InputIterator end)
Construct a non-nullable dictionary column of the fixed-width elements in the range [begin,...
dictionary_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
Construct a nullable dictionary column of the fixed-width elements in the range [begin,...
dictionary_column_wrapper(InputIterator begin, InputIterator end, std::initializer_list< bool > const &validity)
Construct a nullable dictionary column of the fixed-width elements in the range [begin,...
A wrapper for a column of fixed-width elements.
fixed_point_column_wrapper(FixedPointRepIterator begin, FixedPointRepIterator end, std::initializer_list< bool > const &validity, numeric::scale_type scale)
Construct a nullable column of the decimal elements in the range [begin,end) using a validity initial...
fixed_point_column_wrapper(std::initializer_list< Rep > elements, std::initializer_list< bool > validity, numeric::scale_type scale)
Construct a nullable column from an initializer list of decimal elements using another list to indica...
fixed_point_column_wrapper(FixedPointRepIterator begin, FixedPointRepIterator end, numeric::scale_type scale)
Construct a non-nullable column of the decimal elements in the range [begin,end).
fixed_point_column_wrapper(std::initializer_list< Rep > element_list, ValidityIterator v, numeric::scale_type scale)
Construct a nullable column from an initializer list of decimal elements and the range [v,...
fixed_point_column_wrapper(std::initializer_list< Rep > values, numeric::scale_type scale)
Construct a non-nullable column of decimal elements from an initializer list.
fixed_point_column_wrapper(FixedPointRepIterator begin, FixedPointRepIterator end, ValidityIterator v, numeric::scale_type scale)
Construct a nullable column of the fixed-point elements from a range.
column_wrapper derived class for wrapping columns of fixed-width elements.
fixed_width_column_wrapper(InputIterator begin, InputIterator end, std::initializer_list< bool > const &validity)
Construct a nullable column of the fixed-width elements in the range [begin,end) using a validity ini...
fixed_width_column_wrapper(std::initializer_list< std::pair< ElementFrom, bool >> elements)
Construct a nullable column from a list of pairs of fixed-width elements and validity booleans of eac...
fixed_width_column_wrapper(std::initializer_list< ElementFrom > elements, std::initializer_list< bool > validity)
Construct a nullable column from a list of fixed-width elements using another list to indicate the va...
fixed_width_column_wrapper(std::initializer_list< ElementFrom > elements)
Construct a non-nullable column of fixed-width elements from an initializer list.
fixed_width_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
Construct a nullable column of the fixed-width elements in the range [begin,end) using the range [v,...
fixed_width_column_wrapper(std::initializer_list< ElementFrom > element_list, ValidityIterator v)
Construct a nullable column from a list of fixed-width elements and the range [v, v + element_list....
fixed_width_column_wrapper(InputIterator begin, InputIterator end)
Construct a non-nullable column of the fixed-width elements in the range [begin,end).
fixed_width_column_wrapper()
Default constructor initializes an empty column with proper dtype.
column_wrapper derived class for wrapping columns of lists.
lists_column_wrapper(InputIterator begin, InputIterator end)
Construct a lists column containing a single list of fixed-width type from an iterator range.
lists_column_wrapper(std::initializer_list< lists_column_wrapper< T, SourceElementT >> elements, ValidityIterator v)
Construct a lists column of nested lists from an initializer list of values and a validity iterator.
static lists_column_wrapper< T > make_one_empty_row_column(bool valid=true)
Construct a list column containing a single empty, optionally null row.
lists_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v)
Construct a lists column containing a single list of fixed-width type from an iterator range and a va...
lists_column_wrapper()
Construct am empty lists column.
lists_column_wrapper(std::initializer_list< lists_column_wrapper< T, SourceElementT >> elements)
Construct a lists column of nested lists from an initializer list of values.
lists_column_wrapper(std::initializer_list< SourceElementT > elements)
Construct a lists column containing a single list of fixed-width type from an initializer list of val...
lists_column_wrapper(std::initializer_list< std::string > elements, ValidityIterator v)
Construct a lists column containing a single list of strings from an initializer list of values and a...
lists_column_wrapper(std::initializer_list< std::string > elements)
Construct a lists column containing a single list of strings from an initializer list of values.
lists_column_wrapper(std::initializer_list< SourceElementT > elements, ValidityIterator v)
Construct a lists column containing a single list of fixed-width type from an initializer list of val...
column_wrapper derived class for wrapping columns of strings.
strings_column_wrapper(std::initializer_list< std::pair< std::string, bool >> strings)
Construct a nullable column from a list of pairs of strings and validity booleans of each string.
strings_column_wrapper()
Default constructor initializes an empty column of strings.
strings_column_wrapper(std::initializer_list< std::string > strings)
Construct a non-nullable column of strings from a list of strings.
strings_column_wrapper(std::initializer_list< std::string > strings, std::initializer_list< bool > validity)
Construct a nullable column of strings from a list of strings and a list of booleans to indicate the ...
strings_column_wrapper(std::initializer_list< std::string > strings, ValidityIterator v)
Construct a nullable column of strings from a list of strings and the range [v, v + strings....
strings_column_wrapper(StringsIterator begin, StringsIterator end)
Construct a non-nullable column of strings from the range [begin,end).
strings_column_wrapper(StringsIterator begin, StringsIterator end, ValidityIterator v)
Construct a nullable column of strings from the range [begin,end) using the range [v,...
column_wrapper derived class for wrapping columns of structs.
structs_column_wrapper(std::initializer_list< std::reference_wrapper< detail::column_wrapper >> child_column_wrappers, V validity_iter)
Constructs a struct column from the list of column wrappers for child columns.
structs_column_wrapper(std::initializer_list< std::reference_wrapper< detail::column_wrapper >> child_column_wrappers, std::vector< bool > const &validity={})
Constructs a struct column from the list of column wrappers for child columns.
structs_column_wrapper(std::vector< std::unique_ptr< cudf::column >> &&child_columns, std::vector< bool > const &validity={})
Constructs a struct column from the specified list of pre-constructed child columns.
void const * data() const noexcept
Class definition for cudf::column.
Column factory APIs.
Column APIs for gather, scatter, split, slice, etc.
Dictionary column encode and decode APIs.
Class definition for fixed point data type.
std::unique_ptr< column > empty_like(column_view const &input)
Initializes and returns an empty column of the same type as the input.
std::unique_ptr< cudf::column > make_structs_column(size_type num_rows, std::vector< std::unique_ptr< column >> &&child_columns, size_type null_count, rmm::device_buffer &&null_mask, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a STRUCT column using specified child columns as members.
std::unique_ptr< column > make_empty_column(data_type type)
Creates an empty column of the specified type.
std::unique_ptr< column > make_strings_column(cudf::device_span< cuda::std::pair< char const *, size_type > const > strings, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a STRING type column given a device span of pointer/size pairs.
std::unique_ptr< cudf::column > make_lists_column(size_type num_rows, std::unique_ptr< column > offsets_column, std::unique_ptr< column > child_column, size_type null_count, rmm::device_buffer &&null_mask)
Construct a LIST type column given offsets column, child column, null mask and null count.
std::size_t bitmask_allocation_size_bytes(size_type number_of_bits, std::size_t padding_boundary=64)
Computes the required bytes necessary to represent the specified number of bits with a given padding ...
rmm::device_buffer copy_bitmask(bitmask_type const *mask, size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a device_buffer from a slice of bitmask defined by a range of indices [begin_bit,...
rmm::device_buffer create_null_mask(size_type size, mask_state state, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a device_buffer for use as a null value indicator bitmask of a column
size_type null_count(bitmask_type const *bitmask, size_type start, size_type stop, rmm::cuda_stream_view stream=cudf::get_default_stream())
Given a validity bitmask, counts the number of null elements (unset bits) in the range [start,...
std::unique_ptr< column > concatenate(host_span< column_view const > columns_to_concat, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Concatenates multiple columns into a single column.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< column > encode(column_view const &column, data_type indices_type=data_type{type_id::INT32}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a dictionary column by dictionary encoding an existing column.
scale_type
The scale type for fixed_point.
Definition: fixed_point.hpp:33
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
device_async_resource_ref get_current_device_resource_ref()
std::unique_ptr< column > is_fixed_point(strings_column_view const &input, data_type decimal_type=data_type{type_id::DECIMAL64}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a boolean column identifying strings in which all characters are valid for conversion to fixe...
std::unique_ptr< column > count(strings_column_view const &input, string_scalar const &target, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns the number of times the given target string matches in each string.
std::unique_ptr< column > transform(std::vector< column_view > const &inputs, std::string const &transform_udf, data_type output_type, bool is_ptx, std::optional< void * > user_data=std::nullopt, null_aware is_null_aware=null_aware::NO, output_nullability null_policy=output_nullability::PRESERVE, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a new column by applying a transform function against every element of the input columns.
CUDF_HOST_DEVICE void set_bit_unsafe(bitmask_type *bitmask, size_type bit_index)
Sets the specified bit to 1
Definition: bit.hpp:73
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:145
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
uint32_t bitmask_type
Bitmask type stored as 32-bit unsigned integer.
Definition: types.hpp:85
size_type distance(T f, T l)
Similar to std::distance but returns cudf::size_type and performs static_cast
Definition: types.hpp:99
constexpr CUDF_HOST_DEVICE bool is_nested()
Indicates whether T is a nested type.
Definition: traits.hpp:646
@ ALL_NULL
Null mask allocated, initialized to all elements NULL.
@ STRING
String elements.
@ DICTIONARY32
Dictionary type using int32 indices.
Class definition for cudf::lists_column_view.
cuDF interfaces
Definition: host_udf.hpp:26
fixed_point and supporting types
Definition: fixed_point.hpp:23
APIs for managing validity bitmasks.
Convert between source and target types when they differ and where possible.
constexpr ToT operator()(FromT element) const
No conversion necessary: Same type, simply copy element to output.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
Type declarations for libcudf.