14 #include <cudf/utilities/export.hpp>
22 namespace cudf::io::parquet::experimental::detail {
27 class hybrid_scan_reader_impl;
33 namespace CUDF_EXPORT
cudf {
34 namespace io::parquet::experimental {
115 cudf::host_span<std::vector<size_type>
const> row_group_indices)
const;
166 [[nodiscard]] std::pair<std::vector<byte_range_info>, std::vector<byte_range_info>>
171 std::unique_ptr<detail::hybrid_scan_reader_impl> _impl;
Multi-file variant of the experimental Hybrid Scan Parquet reader.
hybrid_scan_multifile(cudf::host_span< FileMetaData const > parquet_metadata, parquet_reader_options const &options)
Constructor for the multi-file experimental Parquet reader.
std::vector< FileMetaData > parquet_metadatas() const
Get parquet metadatas for all sources.
std::pair< std::vector< byte_range_info >, std::vector< byte_range_info > > secondary_filters_byte_ranges(cudf::host_span< std::vector< size_type > const > row_group_indices, parquet_reader_options const &options) const
Get byte ranges of bloom filters and dictionary pages (secondary filters) for row group pruning.
size_type total_rows_in_row_groups(cudf::host_span< std::vector< size_type > const > row_group_indices) const
Get the total number of top-level rows in the per-source row groups.
void reset_column_selection() const
Resets the current column selection.
std::vector< std::vector< size_type > > filter_row_groups_with_stats(cudf::host_span< std::vector< size_type > const > row_group_indices, parquet_reader_options const &options, rmm::cuda_stream_view stream) const
Filter the input row groups using column chunk statistics.
std::vector< std::vector< size_type > > all_row_groups(parquet_reader_options const &options) const
Get all available per-source row group indices from the parquet files.
std::vector< std::vector< size_type > > filter_row_groups_with_byte_range(cudf::host_span< std::vector< size_type > const > row_group_indices, parquet_reader_options const &options) const
Filter the row groups using the byte range specified by [bytes_to_skip, bytes_to_skip + bytes_to_read...
std::vector< byte_range_info > page_index_byte_ranges() const
Get byte ranges of the page index for all sources.
void setup_page_indexes(cudf::host_span< cudf::host_span< uint8_t const > const > page_index_bytes) const
Setup the per-source page index within each Parquet file metadata.
~hybrid_scan_multifile()
Destructor for the multi-file experimental Parquet reader.
hybrid_scan_multifile(cudf::host_span< cudf::host_span< uint8_t const > const > footer_bytes, parquet_reader_options const &options)
Constructor for the multi-file experimental Parquet reader.
Settings for read_parquet().
stores offset and size used to indicate a byte range
int32_t size_type
Row index type for columns and tables.
cuDF-IO API type definitions
Parquet footer schema structs.
C++20 std::span with reduced feature set.
Type declarations for libcudf.