contiguous_split.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/table/table.hpp>
20 #include <cudf/types.hpp>
21 #include <cudf/utilities/export.hpp>
23 
24 #include <memory>
25 #include <vector>
26 
27 namespace CUDF_EXPORT cudf {
28 
44  : metadata(std::make_unique<std::vector<uint8_t>>()),
45  gpu_data(std::make_unique<rmm::device_buffer>())
46  {
47  }
48 
55  packed_columns(std::unique_ptr<std::vector<uint8_t>>&& md,
56  std::unique_ptr<rmm::device_buffer>&& gd)
57  : metadata(std::move(md)), gpu_data(std::move(gd))
58  {
59  }
60 
61  std::unique_ptr<std::vector<uint8_t>> metadata;
62  std::unique_ptr<rmm::device_buffer> gpu_data;
63 };
64 
77 struct packed_table {
80 };
81 
122 std::vector<packed_table> contiguous_split(
123  cudf::table_view const& input,
124  std::vector<size_type> const& splits,
127 
128 namespace detail {
129 
135 struct contiguous_split_state;
136 } // namespace detail
137 
196  public:
207  explicit chunked_pack(
208  cudf::table_view const& input,
209  std::size_t user_buffer_size,
212 
218 
224  [[nodiscard]] std::size_t get_total_contiguous_size() const;
225 
231  [[nodiscard]] bool has_next() const;
232 
246  [[nodiscard]] std::size_t next(cudf::device_span<uint8_t> const& user_buffer);
247 
253  [[nodiscard]] std::unique_ptr<std::vector<uint8_t>> build_metadata() const;
254 
274  [[nodiscard]] static std::unique_ptr<chunked_pack> create(
275  cudf::table_view const& input,
276  std::size_t user_buffer_size,
279 
280  private:
281  // internal state of contiguous split
282  std::unique_ptr<detail::contiguous_split_state> state;
283 };
284 
301 
315 std::vector<uint8_t> pack_metadata(table_view const& table,
316  uint8_t const* contiguous_buffer,
317  size_t buffer_size);
318 
334 
352 table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data);
353 
355 } // namespace CUDF_EXPORT cudf
Perform a chunked "pack" operation of the input table_view using a user provided buffer of size user_...
std::size_t get_total_contiguous_size() const
Obtain the total size of the contiguously packed table_view.
std::size_t next(cudf::device_span< uint8_t > const &user_buffer)
Packs the next chunk into user_buffer. This should be called as long as has_next returns true....
~chunked_pack()
Destructor that will be implemented as default. Declared with definition here because contiguous_spli...
chunked_pack(cudf::table_view const &input, std::size_t user_buffer_size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Construct a chunked_pack class.
std::unique_ptr< std::vector< uint8_t > > build_metadata() const
Build the opaque metadata for all added columns.
static std::unique_ptr< chunked_pack > create(cudf::table_view const &input, std::size_t user_buffer_size, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref temp_mr=cudf::get_current_device_resource_ref())
Creates a chunked_pack instance to perform a "pack" of the table_view "input", where a buffer of user...
bool has_next() const
Function to check if there are chunks left to be copied.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
std::vector< packed_table > contiguous_split(cudf::table_view const &input, std::vector< size_type > const &splits, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Performs a deep-copy split of a table_view into a vector of packed_table where each packed_table is u...
packed_columns pack(cudf::table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Deep-copy a table_view into a serialized contiguous memory format.
table_view unpack(uint8_t const *metadata, uint8_t const *gpu_data)
Deserialize the result of cudf::pack.
std::vector< uint8_t > pack_metadata(table_view const &table, uint8_t const *contiguous_buffer, size_t buffer_size)
Produce the metadata used for packing a table stored in a contiguous buffer.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
cuDF interfaces
Definition: host_udf.hpp:37
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:355
Column data in a serialized format.
packed_columns(std::unique_ptr< std::vector< uint8_t >> &&md, std::unique_ptr< rmm::device_buffer > &&gd)
Construct a new packed columns object.
std::unique_ptr< std::vector< uint8_t > > metadata
Host-side metadata buffer.
std::unique_ptr< rmm::device_buffer > gpu_data
Device-side data buffer.
The result(s) of a cudf::contiguous_split.
packed_columns data
Column data owned.
cudf::table_view table
Result table_view of a cudf::contiguous_split.
Class definition for cudf::table.
Type declarations for libcudf.