avro.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "types.hpp"
20 
22 #include <cudf/types.hpp>
23 
25 #include <rmm/resource_ref.hpp>
26 
27 #include <memory>
28 #include <string>
29 #include <vector>
30 
31 namespace CUDF_EXPORT cudf {
32 namespace io {
39 class avro_reader_options_builder;
40 
45  source_info _source;
46 
47  // Names of column to read; empty is all
48  std::vector<std::string> _columns;
49 
50  // Rows to skip from the start;
51  size_type _skip_rows = 0;
52  // Rows to read; -1 is all
53  size_type _num_rows = -1;
54 
60  explicit avro_reader_options(source_info src) : _source{std::move(src)} {}
61 
63 
64  public:
70  avro_reader_options() = default;
71 
77  [[nodiscard]] source_info const& get_source() const { return _source; }
78 
84  [[nodiscard]] std::vector<std::string> get_columns() const { return _columns; }
85 
91  [[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }
92 
98  [[nodiscard]] size_type get_num_rows() const { return _num_rows; }
99 
105  void set_columns(std::vector<std::string> col_names) { _columns = std::move(col_names); }
106 
112  void set_skip_rows(size_type val) { _skip_rows = val; }
113 
119  void set_num_rows(size_type val) { _num_rows = val; }
120 
128 };
129 
134  avro_reader_options options;
135 
136  public:
143 
149  explicit avro_reader_options_builder(source_info src) : options{std::move(src)} {}
150 
157  avro_reader_options_builder& columns(std::vector<std::string> col_names)
158  {
159  options._columns = std::move(col_names);
160  return *this;
161  }
162 
170  {
171  options._skip_rows = val;
172  return *this;
173  }
174 
182  {
183  options._num_rows = val;
184  return *this;
185  }
186 
190  operator avro_reader_options&&() { return std::move(options); }
191 
199  avro_reader_options&& build() { return std::move(options); }
200 };
201 
219  avro_reader_options const& options,
221  // end of group
223 } // namespace io
224 } // namespace CUDF_EXPORT cudf
Builder to build options for read_avro().
Definition: avro.hpp:133
avro_reader_options_builder()=default
Default constructor.
avro_reader_options_builder(source_info src)
Constructor from source info.
Definition: avro.hpp:149
avro_reader_options_builder & columns(std::vector< std::string > col_names)
Set names of the column to be read.
Definition: avro.hpp:157
avro_reader_options_builder & num_rows(size_type val)
Sets number of rows to read.
Definition: avro.hpp:181
avro_reader_options_builder & skip_rows(size_type val)
Sets number of rows to skip.
Definition: avro.hpp:169
avro_reader_options && build()
move avro_reader_options member once it's built.
Definition: avro.hpp:199
Settings to use for read_avro().
Definition: avro.hpp:44
avro_reader_options()=default
Default constructor.
std::vector< std::string > get_columns() const
Returns names of the columns to be read.
Definition: avro.hpp:84
static avro_reader_options_builder builder(source_info src)
create avro_reader_options_builder which will build avro_reader_options.
void set_skip_rows(size_type val)
Sets number of rows to skip.
Definition: avro.hpp:112
size_type get_num_rows() const
Returns number of rows to read.
Definition: avro.hpp:98
source_info const & get_source() const
Returns source info.
Definition: avro.hpp:77
void set_columns(std::vector< std::string > col_names)
Set names of the column to be read.
Definition: avro.hpp:105
void set_num_rows(size_type val)
Sets number of rows to read.
Definition: avro.hpp:119
size_type get_skip_rows() const
Returns number of rows to skip from the start.
Definition: avro.hpp:91
table_with_metadata read_avro(avro_reader_options const &options, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Reads an Avro dataset into a set of columns.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
device_memory_resource * get_current_device_resource()
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: aggregation.hpp:35
Source information for read interfaces.
Definition: io/types.hpp:337
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Class definitions for (mutable)_table_view
Type declarations for libcudf.