Files | Functions
IO Utilities

Files

file  parquet_io_utils.hpp
 IO utilities for the Parquet and Hybrid scan readers.
 

Functions

std::unique_ptr< cudf::io::datasource::buffercudf::io::parquet::fetch_footer_to_host (cudf::io::datasource &datasource)
 Fetches a host buffer of Parquet footer bytes from the input data source. More...
 
std::vector< std::unique_ptr< cudf::io::datasource::buffer > > cudf::io::parquet::fetch_footers_to_host (cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources)
 Fetches host buffers of Parquet footer bytes from multiple input data sources. More...
 
std::unique_ptr< cudf::io::datasource::buffercudf::io::parquet::fetch_page_index_to_host (cudf::io::datasource &datasource, byte_range_info const page_index_bytes)
 Fetches a host buffer of Parquet page index from the input data source. More...
 
std::vector< std::unique_ptr< cudf::io::datasource::buffer > > cudf::io::parquet::fetch_page_indexes_to_host (cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources, cudf::host_span< byte_range_info const > page_index_bytes_per_source)
 Fetches host buffers of Parquet page index bytes from multiple input data sources. More...
 
std::tuple< std::vector< rmm::device_buffer >, std::vector< cudf::device_span< uint8_t const > >, std::future< void > > cudf::io::parquet::fetch_byte_ranges_to_device_async (cudf::io::datasource &datasource, cudf::host_span< byte_range_info const > byte_ranges, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
 Fetches a list of byte ranges from a datasource into device buffers. More...
 
std::tuple< std::vector< rmm::device_buffer >, std::vector< std::vector< cudf::device_span< uint8_t const > > >, std::future< void > > cudf::io::parquet::fetch_byte_ranges_to_device_async (cudf::host_span< std::reference_wrapper< cudf::io::datasource > const > datasources, cudf::host_span< std::vector< byte_range_info > const > byte_ranges_per_source, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)
 Fetches lists of byte ranges from multiple datasources into device buffers. More...
 

Detailed Description

Function Documentation

◆ fetch_byte_ranges_to_device_async() [1/2]

std::tuple<std::vector<rmm::device_buffer>, std::vector<std::vector<cudf::device_span<uint8_t const> > >, std::future<void> > cudf::io::parquet::fetch_byte_ranges_to_device_async ( cudf::host_span< std::reference_wrapper< cudf::io::datasource > const >  datasources,
cudf::host_span< std::vector< byte_range_info > const >  byte_ranges_per_source,
rmm::cuda_stream_view  stream,
rmm::device_async_resource_ref  mr 
)

Fetches lists of byte ranges from multiple datasources into device buffers.

Parameters
datasourcesInput datasources
byte_ranges_per_sourceVector of byte ranges to fetch, one per datasource
streamCUDA stream
mrDevice memory resource
Returns
A tuple containing a vector of device buffers, a vector of vectors of device spans (one per byte range per datasource), and a future to wait on the read tasks

◆ fetch_byte_ranges_to_device_async() [2/2]

std::tuple<std::vector<rmm::device_buffer>, std::vector<cudf::device_span<uint8_t const> >, std::future<void> > cudf::io::parquet::fetch_byte_ranges_to_device_async ( cudf::io::datasource datasource,
cudf::host_span< byte_range_info const >  byte_ranges,
rmm::cuda_stream_view  stream,
rmm::device_async_resource_ref  mr 
)

Fetches a list of byte ranges from a datasource into device buffers.

Parameters
datasourceInput datasource
byte_rangesByte ranges to fetch
streamCUDA stream
mrDevice memory resource
Returns
A tuple containing the device buffers, the device spans of the fetched data, and a future to wait on the read tasks

◆ fetch_footer_to_host()

std::unique_ptr<cudf::io::datasource::buffer> cudf::io::parquet::fetch_footer_to_host ( cudf::io::datasource datasource)

Fetches a host buffer of Parquet footer bytes from the input data source.

Parameters
datasourceInput data source
Returns
Host buffer containing footer bytes

◆ fetch_footers_to_host()

std::vector<std::unique_ptr<cudf::io::datasource::buffer> > cudf::io::parquet::fetch_footers_to_host ( cudf::host_span< std::reference_wrapper< cudf::io::datasource > const >  datasources)

Fetches host buffers of Parquet footer bytes from multiple input data sources.

Parameters
datasourcesInput data sources
Returns
Vector of host buffers containing footer bytes, one per datasource
Exceptions
cudf::logic_errorif any datasource contains a corrupted Parquet magic number, header or footer, or has an invalid footer length.

◆ fetch_page_index_to_host()

std::unique_ptr<cudf::io::datasource::buffer> cudf::io::parquet::fetch_page_index_to_host ( cudf::io::datasource datasource,
byte_range_info const  page_index_bytes 
)

Fetches a host buffer of Parquet page index from the input data source.

Parameters
datasourceInput datasource
page_index_bytesByte range of page index
Returns
Host buffer containing page index bytes

◆ fetch_page_indexes_to_host()

std::vector<std::unique_ptr<cudf::io::datasource::buffer> > cudf::io::parquet::fetch_page_indexes_to_host ( cudf::host_span< std::reference_wrapper< cudf::io::datasource > const >  datasources,
cudf::host_span< byte_range_info const >  page_index_bytes_per_source 
)

Fetches host buffers of Parquet page index bytes from multiple input data sources.

Parameters
datasourcesInput datasources
page_index_bytes_per_sourceByte ranges of page index, one per datasource
Returns
Vector of host buffers containing page index bytes, one per datasource
Exceptions
cudf::logic_errorif the number of datasources does not match the number of page index byte ranges
std::out_of_rangeif any page index byte range is out of range for its datasource