contiguous_split.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/packed_types.hpp>
9 #include <cudf/types.hpp>
10 #include <cudf/utilities/export.hpp>
12 
13 #include <cstdint>
14 #include <memory>
15 #include <span>
16 #include <vector>
17 
18 namespace CUDF_EXPORT cudf {
19 
67 std::vector<packed_table> contiguous_split(
68  cudf::table_view const& input,
69  std::vector<size_type> const& splits,
72 
73 namespace detail {
74 
80 struct contiguous_split_state;
81 } // namespace detail
82 
141  public:
152  explicit chunked_pack(
153  cudf::table_view const& input,
154  std::size_t user_buffer_size,
157 
163 
169  [[nodiscard]] std::size_t get_total_contiguous_size() const;
170 
176  [[nodiscard]] bool has_next() const;
177 
191  [[nodiscard]] std::size_t next(cudf::device_span<uint8_t> const& user_buffer);
192 
198  [[nodiscard]] std::unique_ptr<std::vector<uint8_t>> build_metadata() const;
199 
219  [[nodiscard]] static std::unique_ptr<chunked_pack> create(
220  cudf::table_view const& input,
221  std::size_t user_buffer_size,
224 
225  private:
226  // internal state of contiguous split
227  std::unique_ptr<detail::contiguous_split_state> state;
228 };
229 
246 
259 std::size_t packed_size(
260  cudf::table_view const& input,
263 
277 std::vector<uint8_t> pack_metadata(table_view const& table,
278  uint8_t const* contiguous_buffer,
279  size_t buffer_size);
280 
296 
314 table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);
315 
339  public:
351  class column_view {
352  public:
356  [[nodiscard]] data_type type() const;
357 
361  [[nodiscard]] size_type num_rows() const;
362 
366  [[nodiscard]] size_type null_count() const;
367 
371  [[nodiscard]] size_type num_children() const;
372 
380  [[nodiscard]] column_view child(size_type i) const;
381 
382  private:
383  friend class packed_metadata_view;
384  data_type _type{type_id::EMPTY};
385  size_type _size{};
386  size_type _null_count{};
387  size_type _num_children{};
388  // Span from this entry to the end of the metadata buffer (needed for child traversal).
389  std::span<std::uint8_t const> _buffer;
390  explicit column_view(std::span<std::uint8_t const> buffer);
391  };
392 
400  explicit packed_metadata_view(std::span<std::uint8_t const> buffer);
401 
405  [[nodiscard]] size_type num_columns() const;
406 
415  [[nodiscard]] size_type num_rows() const;
416 
424  [[nodiscard]] column_view column(size_type i) const;
425 
426  private:
427  // Span from the first top-level column entry to the end of the metadata buffer.
428  std::span<std::uint8_t const> _entries;
429  size_type _num_columns{};
430 };
431 
433 } // namespace CUDF_EXPORT cudf
Perform a chunked "pack" operation of the input table_view using a user provided buffer of size user_...
std::size_t get_total_contiguous_size() const
Obtain the total size of the contiguously packed table_view.
std::size_t next(cudf::device_span< uint8_t > const &user_buffer)
Packs the next chunk into user_buffer. This should be called as long as has_next returns true....
~chunked_pack()
Destructor that will be implemented as default. Declared with definition here because contiguous_spli...
chunked_pack(cudf::table_view const &input, std::size_t user_buffer_size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Construct a chunked_pack class.
std::unique_ptr< std::vector< uint8_t > > build_metadata() const
Build the opaque metadata for all added columns.
static std::unique_ptr< chunked_pack > create(cudf::table_view const &input, std::size_t user_buffer_size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Creates a chunked_pack instance to perform a "pack" of the table_view "input", where a buffer of user...
bool has_next() const
Function to check if there are chunks left to be copied.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:277
A non-owning view of a single column's metadata within packed column data.
column_view child(size_type i) const
A view of the i-th child column's metadata.
A non-owning view over the host metadata produced by cudf::pack.
size_type num_rows() const
The number of rows in the table.
column_view column(size_type i) const
A view of the i-th top-level column's metadata.
size_type num_columns() const
packed_metadata_view(std::span< std::uint8_t const > buffer)
Construct a view from a metadata byte buffer.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
std::vector< packed_table > contiguous_split(cudf::table_view const &input, std::vector< size_type > const &splits, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Performs a deep-copy split of a table_view into a vector of packed_table where each packed_table is u...
packed_columns pack(cudf::table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Deep-copy a table_view into a serialized contiguous memory format.
std::size_t packed_size(cudf::table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Compute the size in bytes of the contiguous memory buffer needed to pack the input table.
table_view unpack(uint8_t const *metadata, uint8_t const *gpu_data)
Deserialize the result of cudf::pack.
std::vector< uint8_t > pack_metadata(table_view const &table, uint8_t const *contiguous_buffer, size_t buffer_size)
Produce the metadata used for packing a table stored in a contiguous buffer.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuda::std::span< T, Extent > device_span
Device span is an alias of cuda::std::span.
Definition: span.hpp:320
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
Packed table and column types for serialization.
Column data in a serialized format.
Type declarations for libcudf.