11 #include <cudf/utilities/export.hpp>
15 namespace CUDF_EXPORT
cudf {
16 namespace io::parquet::experimental {
64 struct roaring_bitmap_impl;
79 std::size_t chunk_read_limit,
106 std::size_t chunk_read_limit,
107 std::size_t pass_read_limit,
141 std::unique_ptr<cudf::io::chunked_parquet_reader> _reader;
142 std::queue<size_t> _row_group_row_offsets;
143 std::queue<size_type> _row_group_row_counts;
144 std::queue<roaring_bitmap_impl> _deletion_vectors;
145 std::queue<size_type> _deletion_vector_row_counts;
147 bool _is_unspecified_row_group_data;
The chunked parquet reader class to read a Parquet source iteratively in a series of tables,...
table_with_metadata read_chunk()
Read a chunk of table from the Parquet source, prepend an index column to it, and filters the resulta...
chunked_parquet_reader(std::size_t chunk_read_limit, std::size_t pass_read_limit, parquet_reader_options const &options, deletion_vector_info const &deletion_vector_info, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
chunked_parquet_reader(std::size_t chunk_read_limit, parquet_reader_options const &options, deletion_vector_info const &deletion_vector_info, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
~chunked_parquet_reader()
Destructor, destroying the internal reader instance and the roaring bitmap deletion vector.
bool has_next() const
Check if there is any data in the given source that has not yet been read.
Settings for read_parquet().
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_parquet(parquet_reader_options const &options, deletion_vector_info const &deletion_vector_info, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource_ref())
Reads a table from parquet source, prepends an index column to it, deserializes the specified 64-bit ...
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
device_async_resource_ref get_current_device_resource_ref()
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuDF-IO API type definitions
Struct used to specify information about deletion vectors and the index column to the experimental pa...
std::vector< cudf::host_span< cuda::std::byte const > > serialized_roaring_bitmaps
Host spans of 64-bit roaring bitmaps serialized in portable format.
std::vector< size_type > deletion_vector_row_counts
Number of rows spanned by each deletion vector.
std::vector< size_t > row_group_offsets
Row index offset for each row group to be read from the Parquet source(s)
std::vector< size_type > row_group_num_rows
Number of rows in each row group to be read from the Parquet source(s)
Type declarations for libcudf.