deletion_vectors.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/io/parquet.hpp>
9 #include <cudf/io/types.hpp>
10 #include <cudf/types.hpp>
11 #include <cudf/utilities/export.hpp>
12 
13 #include <queue>
14 
15 namespace CUDF_EXPORT cudf {
16 namespace io::parquet::experimental {
17 
38  public:
40  struct roaring_bitmap_impl;
41 
57  std::size_t chunk_read_limit,
58  parquet_reader_options const& options,
59  cudf::host_span<cuda::std::byte const> serialized_roaring64,
60  cudf::host_span<size_t const> row_group_offsets,
61  cudf::host_span<size_type const> row_group_num_rows,
64 
88  std::size_t chunk_read_limit,
89  std::size_t pass_read_limit,
90  parquet_reader_options const& options,
91  cudf::host_span<cuda::std::byte const> serialized_roaring64,
92  cudf::host_span<size_t const> row_group_offsets,
93  cudf::host_span<size_type const> row_group_num_rows,
96 
102 
108  [[nodiscard]] bool has_next() const;
109 
123 
124  private:
125  std::unique_ptr<cudf::io::chunked_parquet_reader> _reader;
126  std::queue<size_t> _row_group_row_offsets;
127  std::queue<size_type> _row_group_row_counts;
128  std::unique_ptr<roaring_bitmap_impl> _deletion_vector;
129  size_t _start_row;
130  bool _is_unspecified_row_group_data;
131  rmm::cuda_stream_view _stream;
134 };
135 
160  parquet_reader_options const& options,
161  cudf::host_span<cuda::std::byte const> serialized_roaring64,
162  cudf::host_span<size_t const> row_group_offsets,
163  cudf::host_span<size_type const> row_group_num_rows,
166  // end of group
168 
169 } // namespace io::parquet::experimental
170 } // namespace CUDF_EXPORT cudf
The chunked parquet reader class to read a Parquet source iteratively in a series of tables,...
chunked_parquet_reader(std::size_t chunk_read_limit, parquet_reader_options const &options, cudf::host_span< cuda::std::byte const > serialized_roaring64, cudf::host_span< size_t const > row_group_offsets, cudf::host_span< size_type const > row_group_num_rows, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
table_with_metadata read_chunk()
Read a chunk of table from the Parquet source, prepend an index column to it, and filters the resulta...
chunked_parquet_reader(std::size_t chunk_read_limit, std::size_t pass_read_limit, parquet_reader_options const &options, cudf::host_span< cuda::std::byte const > serialized_roaring64, cudf::host_span< size_t const > row_group_offsets, cudf::host_span< size_type const > row_group_num_rows, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Constructor for the chunked reader.
~chunked_parquet_reader()
Destructor, destroying the internal reader instance and the roaring bitmap deletion vector.
bool has_next() const
Check if there is any data in the given source that has not yet been read.
Settings for read_parquet().
Definition: parquet.hpp:67
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_parquet(parquet_reader_options const &options, cudf::host_span< cuda::std::byte const > serialized_roaring64, cudf::host_span< size_t const > row_group_offsets, cudf::host_span< size_type const > row_group_num_rows, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource_ref())
Reads a table from parquet source, prepends an index column to it, deserializes the roaring64 deletio...
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
device_async_resource_ref get_current_device_resource_ref()
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuDF-IO API type definitions
cuDF interfaces
Definition: host_udf.hpp:26
C++20 std::span with reduced feature set.
Definition: span.hpp:182
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Type declarations for libcudf.