contiguous_split.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/table/table.hpp>
9 #include <cudf/types.hpp>
10 #include <cudf/utilities/export.hpp>
12 
13 #include <memory>
14 #include <vector>
15 
16 namespace CUDF_EXPORT cudf {
17 
33  : metadata(std::make_unique<std::vector<uint8_t>>()),
34  gpu_data(std::make_unique<rmm::device_buffer>())
35  {
36  }
37 
44  packed_columns(std::unique_ptr<std::vector<uint8_t>>&& md,
45  std::unique_ptr<rmm::device_buffer>&& gd)
46  : metadata(std::move(md)), gpu_data(std::move(gd))
47  {
48  }
49 
50  std::unique_ptr<std::vector<uint8_t>> metadata;
51  std::unique_ptr<rmm::device_buffer> gpu_data;
52 };
53 
66 struct packed_table {
69 };
70 
111 std::vector<packed_table> contiguous_split(
112  cudf::table_view const& input,
113  std::vector<size_type> const& splits,
116 
117 namespace detail {
118 
124 struct contiguous_split_state;
125 } // namespace detail
126 
185  public:
196  explicit chunked_pack(
197  cudf::table_view const& input,
198  std::size_t user_buffer_size,
201 
207 
213  [[nodiscard]] std::size_t get_total_contiguous_size() const;
214 
220  [[nodiscard]] bool has_next() const;
221 
235  [[nodiscard]] std::size_t next(cudf::device_span<uint8_t> const& user_buffer);
236 
242  [[nodiscard]] std::unique_ptr<std::vector<uint8_t>> build_metadata() const;
243 
263  [[nodiscard]] static std::unique_ptr<chunked_pack> create(
264  cudf::table_view const& input,
265  std::size_t user_buffer_size,
268 
269  private:
270  // internal state of contiguous split
271  std::unique_ptr<detail::contiguous_split_state> state;
272 };
273 
290 
304 std::vector<uint8_t> pack_metadata(table_view const& table,
305  uint8_t const* contiguous_buffer,
306  size_t buffer_size);
307 
323 
341 table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);
342 
344 } // namespace CUDF_EXPORT cudf
Perform a chunked "pack" operation of the input table_view using a user provided buffer of size user_...
std::size_t get_total_contiguous_size() const
Obtain the total size of the contiguously packed table_view.
std::size_t next(cudf::device_span< uint8_t > const &user_buffer)
Packs the next chunk into user_buffer. This should be called as long as has_next returns true....
~chunked_pack()
Destructor that will be implemented as default. Declared with definition here because contiguous_spli...
chunked_pack(cudf::table_view const &input, std::size_t user_buffer_size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Construct a chunked_pack class.
std::unique_ptr< std::vector< uint8_t > > build_metadata() const
Build the opaque metadata for all added columns.
static std::unique_ptr< chunked_pack > create(cudf::table_view const &input, std::size_t user_buffer_size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Creates a chunked_pack instance to perform a "pack" of the table_view "input", where a buffer of user...
bool has_next() const
Function to check if there are chunks left to be copied.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
std::vector< packed_table > contiguous_split(cudf::table_view const &input, std::vector< size_type > const &splits, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Performs a deep-copy split of a table_view into a vector of packed_table where each packed_table is u...
packed_columns pack(cudf::table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Deep-copy a table_view into a serialized contiguous memory format.
table_view unpack(uint8_t const *metadata, uint8_t const *gpu_data)
Deserialize the result of cudf::pack.
std::vector< uint8_t > pack_metadata(table_view const &table, uint8_t const *contiguous_buffer, size_t buffer_size)
Produce the metadata used for packing a table stored in a contiguous buffer.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuDF interfaces
Definition: host_udf.hpp:26
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:323
Column data in a serialized format.
packed_columns(std::unique_ptr< std::vector< uint8_t >> &&md, std::unique_ptr< rmm::device_buffer > &&gd)
Construct a new packed columns object.
std::unique_ptr< std::vector< uint8_t > > metadata
Host-side metadata buffer.
std::unique_ptr< rmm::device_buffer > gpu_data
Device-side data buffer.
The result(s) of a cudf::contiguous_split.
packed_columns data
Column data owned.
cudf::table_view table
Result table_view of a cudf::contiguous_split.
Class definition for cudf::table.
Type declarations for libcudf.