avro.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include "types.hpp"
9 
11 #include <cudf/types.hpp>
13 
14 #include <memory>
15 #include <string>
16 #include <vector>
17 
18 namespace CUDF_EXPORT cudf {
19 namespace io {
27 
32  source_info _source;
33 
34  // Names of column to read; empty is all
35  std::vector<std::string> _columns;
36 
37  // Rows to skip from the start;
38  size_type _skip_rows = 0;
39  // Rows to read; -1 is all
40  size_type _num_rows = -1;
41 
47  explicit avro_reader_options(source_info src) : _source{std::move(src)} {}
48 
50 
51  public:
57  avro_reader_options() = default;
58 
64  [[nodiscard]] source_info const& get_source() const { return _source; }
65 
71  [[nodiscard]] std::vector<std::string> get_columns() const { return _columns; }
72 
78  [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }
79 
85  [[nodiscard]] size_type get_num_rows() const { return _num_rows; }
86 
92  void set_source(source_info src) { _source = std::move(src); }
93 
99  void set_columns(std::vector<std::string> col_names) { _columns = std::move(col_names); }
100 
106  void set_skip_rows(size_type val) { _skip_rows = val; }
107 
113  void set_num_rows(size_type val) { _num_rows = val; }
114 
122 };
123 
128  avro_reader_options options;
129 
130  public:
137 
143  explicit avro_reader_options_builder(source_info src) : options{std::move(src)} {}
144 
151  avro_reader_options_builder& columns(std::vector<std::string> col_names)
152  {
153  options._columns = std::move(col_names);
154  return *this;
155  }
156 
164  {
165  options._skip_rows = val;
166  return *this;
167  }
168 
176  {
177  options._num_rows = val;
178  return *this;
179  }
180 
184  operator avro_reader_options&&() { return std::move(options); }
185 
193  avro_reader_options&& build() { return std::move(options); }
194 };
195 
214  avro_reader_options const& options,
217  // end of group
219 } // namespace io
220 } // namespace CUDF_EXPORT cudf
Builder to build options for read_avro().
Definition: avro.hpp:127
avro_reader_options_builder()=default
Default constructor.
avro_reader_options_builder(source_info src)
Constructor from source info.
Definition: avro.hpp:143
avro_reader_options_builder & columns(std::vector< std::string > col_names)
Set names of the column to be read.
Definition: avro.hpp:151
avro_reader_options_builder & num_rows(size_type val)
Sets number of rows to read.
Definition: avro.hpp:175
avro_reader_options_builder & skip_rows(size_type val)
Sets number of rows to skip.
Definition: avro.hpp:163
avro_reader_options && build()
move avro_reader_options member once it's built.
Definition: avro.hpp:193
Settings to use for read_avro().
Definition: avro.hpp:31
void set_source(source_info src)
Sets source info.
Definition: avro.hpp:92
avro_reader_options()=default
Default constructor.
std::vector< std::string > get_columns() const
Returns names of the columns to be read.
Definition: avro.hpp:71
static avro_reader_options_builder builder(source_info src)
create avro_reader_options_builder which will build avro_reader_options.
void set_skip_rows(size_type val)
Sets number of rows to skip.
Definition: avro.hpp:106
size_type get_num_rows() const
Returns number of rows to read.
Definition: avro.hpp:85
source_info const & get_source() const
Returns source info.
Definition: avro.hpp:64
void set_columns(std::vector< std::string > col_names)
Set names of the column to be read.
Definition: avro.hpp:99
void set_num_rows(size_type val)
Sets number of rows to read.
Definition: avro.hpp:113
size_type get_skip_rows() const
Returns number of rows to skip from the start.
Definition: avro.hpp:78
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_avro(avro_reader_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads an Avro dataset into a set of columns.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
Source information for read interfaces.
Definition: io/types.hpp:316
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Class definitions for (mutable)_table_view
Type declarations for libcudf.