11 #include <cudf/utilities/export.hpp>
16 namespace CUDF_EXPORT
cudf {
17 namespace io::parquet::experimental {
77 std::size_t chunk_read_limit,
104 std::size_t chunk_read_limit,
105 std::size_t pass_read_limit,
139 std::unique_ptr<cudf::io::chunked_parquet_reader> _reader;
140 std::queue<size_t> _row_group_row_offsets;
141 std::queue<size_type> _row_group_row_counts;
142 std::queue<cudf::roaring_bitmap> _deletion_vectors;
143 std::queue<size_type> _deletion_vector_row_counts;
145 bool _is_unspecified_row_group_data;
190 cudf::size_type max_chunk_rows = std::numeric_limits<size_type>::max(),
The chunked parquet reader class to read a Parquet source iteratively in a series of tables,...
table_with_metadata read_chunk()
Read a chunk of table from the Parquet source, prepend an index column to it, and filters the resulta...
chunked_parquet_reader(std::size_t chunk_read_limit, std::size_t pass_read_limit, parquet_reader_options const &options, deletion_vector_info const &deletion_vector_info, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
chunked_parquet_reader(std::size_t chunk_read_limit, parquet_reader_options const &options, deletion_vector_info const &deletion_vector_info, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
~chunked_parquet_reader()
Destructor, destroying the internal reader instance and the roaring bitmap deletion vector.
bool has_next() const
Check if there is any data in the given source that has not yet been read.
Settings for read_parquet().
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
size_t compute_num_deleted_rows(deletion_vector_info const &deletion_vector_info, cudf::size_type max_chunk_rows=std::numeric_limits< size_type >::max(), rmm::cuda_stream_view stream=cudf::get_default_stream())
Computes the number of rows deleted by the serialized 64-bit roaring bitmap deletion vectors.
table_with_metadata read_parquet(parquet_reader_options const &options, deletion_vector_info const &deletion_vector_info, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads a table from parquet source, prepends an index column to it, deserializes the specified 64-bit ...
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref
int32_t size_type
Row index type for columns and tables.
cuDF-IO API type definitions
Struct used to specify information about deletion vectors and the index column to the experimental pa...
std::vector< cudf::host_span< cuda::std::byte const > > serialized_roaring_bitmaps
Host spans of 64-bit roaring bitmaps serialized in portable format.
std::vector< size_type > deletion_vector_row_counts
Number of rows spanned by each deletion vector.
std::vector< size_t > row_group_offsets
Row index offset for each row group to be read from the Parquet source(s)
std::vector< size_type > row_group_num_rows
Number of rows in each row group to be read from the Parquet source(s)
Type declarations for libcudf.