parquet_io_utils.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/io/datasource.hpp>
10 
11 #include <rmm/cuda_stream_view.hpp>
12 #include <rmm/device_buffer.hpp>
13 #include <rmm/resource_ref.hpp>
14 
15 #include <functional>
16 #include <future>
17 #include <tuple>
18 #include <vector>
19 
25 namespace CUDF_EXPORT cudf {
26 namespace io::parquet {
27 
36 
45 [[nodiscard]] std::unique_ptr<cudf::io::datasource::buffer> fetch_footer_to_host(
47 
59 [[nodiscard]] std::vector<std::unique_ptr<cudf::io::datasource::buffer>> fetch_footers_to_host(
60  cudf::host_span<std::reference_wrapper<cudf::io::datasource> const> datasources);
61 
71 [[nodiscard]] std::unique_ptr<cudf::io::datasource::buffer> fetch_page_index_to_host(
72  cudf::io::datasource& datasource, byte_range_info const page_index_bytes);
73 
87 [[nodiscard]] std::vector<std::unique_ptr<cudf::io::datasource::buffer>> fetch_page_indexes_to_host(
88  cudf::host_span<std::reference_wrapper<cudf::io::datasource> const> datasources,
89  cudf::host_span<byte_range_info const> page_index_bytes_per_source);
90 
104 std::tuple<std::vector<rmm::device_buffer>,
105  std::vector<cudf::device_span<uint8_t const>>,
106  std::future<void>>
109  rmm::cuda_stream_view stream,
111 
125 std::tuple<std::vector<rmm::device_buffer>,
126  std::vector<std::vector<cudf::device_span<uint8_t const>>>,
127  std::future<void>>
129  cudf::host_span<std::reference_wrapper<cudf::io::datasource> const> datasources,
130  cudf::host_span<std::vector<byte_range_info> const> byte_ranges_per_source,
131  rmm::cuda_stream_view stream,
133  // end of group
135 } // namespace io::parquet
136 } // namespace CUDF_EXPORT cudf
Interface class for providing input data to the readers.
Definition: datasource.hpp:31
stores offset and size used to indicate a byte range
std::unique_ptr< cudf::io::datasource::buffer > fetch_page_index_to_host(cudf::io::datasource &datasource, byte_range_info const page_index_bytes)
Fetches a host buffer of Parquet page index from the input data source.
std::tuple< std::vector< rmm::device_buffer >, std::vector< std::vector< cudf::device_span< uint8_t const > > >, std::future< void > > fetch_byte_ranges_to_device_async(cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources, cudf::host_span< std::vector< byte_range_info > const > byte_ranges_per_source, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
Fetches lists of byte ranges from multiple datasources into device buffers.
std::unique_ptr< cudf::io::datasource::buffer > fetch_footer_to_host(cudf::io::datasource &datasource)
Fetches a host buffer of Parquet footer bytes from the input data source.
std::vector< std::unique_ptr< cudf::io::datasource::buffer > > fetch_page_indexes_to_host(cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources, cudf::host_span< byte_range_info const > page_index_bytes_per_source)
Fetches host buffers of Parquet page index bytes from multiple input data sources.
std::vector< std::unique_ptr< cudf::io::datasource::buffer > > fetch_footers_to_host(cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources)
Fetches host buffers of Parquet footer bytes from multiple input data sources.
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref
cuDF interfaces
Definition: host_udf.hpp:26
C++20 std::span with reduced feature set.
Definition: span.hpp:184