11 #include <cudf/utilities/export.hpp>
15 namespace CUDF_EXPORT
cudf {
16 namespace io::parquet::experimental {
40 struct roaring_bitmap_impl;
57 std::size_t chunk_read_limit,
88 std::size_t chunk_read_limit,
89 std::size_t pass_read_limit,
125 std::unique_ptr<cudf::io::chunked_parquet_reader> _reader;
126 std::queue<size_t> _row_group_row_offsets;
127 std::queue<size_type> _row_group_row_counts;
128 std::unique_ptr<roaring_bitmap_impl> _deletion_vector;
130 bool _is_unspecified_row_group_data;
The chunked parquet reader class to read a Parquet source iteratively in a series of tables,...
chunked_parquet_reader(std::size_t chunk_read_limit, parquet_reader_options const &options, cudf::host_span< cuda::std::byte const > serialized_roaring64, cudf::host_span< size_t const > row_group_offsets, cudf::host_span< size_type const > row_group_num_rows, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
table_with_metadata read_chunk()
Read a chunk of table from the Parquet source, prepend an index column to it, and filters the resulta...
chunked_parquet_reader(std::size_t chunk_read_limit, std::size_t pass_read_limit, parquet_reader_options const &options, cudf::host_span< cuda::std::byte const > serialized_roaring64, cudf::host_span< size_t const > row_group_offsets, cudf::host_span< size_type const > row_group_num_rows, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
~chunked_parquet_reader()
Destructor, destroying the internal reader instance and the roaring bitmap deletion vector.
bool has_next() const
Check if there is any data in the given source that has not yet been read.
Settings for read_parquet().
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_parquet(parquet_reader_options const &options, cudf::host_span< cuda::std::byte const > serialized_roaring64, cudf::host_span< size_t const > row_group_offsets, cudf::host_span< size_type const > row_group_num_rows, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource_ref())
Reads a table from parquet source, prepends an index column to it, deserializes the roaring64 deletio...
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
device_async_resource_ref get_current_device_resource_ref()
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuDF-IO API type definitions
C++20 std::span with reduced feature set.
Type declarations for libcudf.