parquet.hpp
1 
6 #pragma once
7 #include <cstddef>
8 #include <memory>
9 
10 #include <cudf/ast/expressions.hpp>
11 #include <cudf/io/parquet.hpp>
12 #include <cudf/types.hpp>
13 
14 #include <rapidsmpf/communicator/communicator.hpp>
15 #include <rapidsmpf/owning_wrapper.hpp>
16 #include <rapidsmpf/streaming/core/actor.hpp>
17 #include <rapidsmpf/streaming/core/channel.hpp>
18 #include <rapidsmpf/streaming/core/context.hpp>
19 
20 namespace rapidsmpf::streaming {
21 
25 struct Filter {
27  cudf::ast::expression& filter;
29 };
30 
31 namespace actor {
54  std::shared_ptr<Context> ctx,
55  std::shared_ptr<Communicator> comm,
56  std::shared_ptr<Channel> ch_out,
57  std::size_t num_producers,
58  cudf::io::parquet_reader_options options,
59  // TODO: use byte count, not row count?
60  cudf::size_type num_rows_per_chunk,
61  std::unique_ptr<Filter> filter = nullptr
62 );
63 } // namespace actor
64 } // namespace rapidsmpf::streaming
Utility class to store an arbitrary type-erased object while another object is alive.
Actor read_parquet(std::shared_ptr< Context > ctx, std::shared_ptr< Communicator > comm, std::shared_ptr< Channel > ch_out, std::size_t num_producers, cudf::io::parquet_reader_options options, cudf::size_type num_rows_per_chunk, std::unique_ptr< Filter > filter=nullptr)
Asynchronously read parquet files into an output channel.
coro::task< void > Actor
Alias for an actor in a streaming graph.
Definition: actor.hpp:18
Filter ast expression with lifetime/stream management.
Definition: parquet.hpp:25
cudf::ast::expression & filter
Filter expression.
Definition: parquet.hpp:27
OwningWrapper owner
Owner of all objects in the filter.
Definition: parquet.hpp:28
rmm::cuda_stream_view stream
Stream the filter's scalars are valid on.
Definition: parquet.hpp:26