parquet.hpp
1 
6 #pragma once
7 #include <cstddef>
8 #include <memory>
9 
10 #include <cudf/ast/expressions.hpp>
11 #include <cudf/io/parquet.hpp>
12 #include <cudf/types.hpp>
13 
14 #include <rapidsmpf/owning_wrapper.hpp>
15 #include <rapidsmpf/streaming/core/channel.hpp>
16 #include <rapidsmpf/streaming/core/context.hpp>
17 #include <rapidsmpf/streaming/core/node.hpp>
18 
19 namespace rapidsmpf::streaming {
20 
24 struct Filter {
26  cudf::ast::expression& filter;
28 };
29 
30 namespace node {
52  std::shared_ptr<Context> ctx,
53  std::shared_ptr<Channel> ch_out,
54  std::size_t num_producers,
55  cudf::io::parquet_reader_options options,
56  // TODO: use byte count, not row count?
57  cudf::size_type num_rows_per_chunk,
58  std::unique_ptr<Filter> filter = nullptr
59 );
60 } // namespace node
61 } // namespace rapidsmpf::streaming
Utility class to store an arbitrary type-erased object while another object is alive.
Node read_parquet(std::shared_ptr< Context > ctx, std::shared_ptr< Channel > ch_out, std::size_t num_producers, cudf::io::parquet_reader_options options, cudf::size_type num_rows_per_chunk, std::unique_ptr< Filter > filter=nullptr)
Asynchronously read parquet files into an output channel.
coro::task< void > Node
Alias for a node in a streaming pipeline.
Definition: node.hpp:18
Filter ast expression with lifetime/stream management.
Definition: parquet.hpp:24
cudf::ast::expression & filter
Filter expression.
Definition: parquet.hpp:26
OwningWrapper owner
Owner of all objects in the filter.
Definition: parquet.hpp:27
rmm::cuda_stream_view stream
Stream the filter's scalars are valid on.
Definition: parquet.hpp:25