16 #include <cudf/utilities/export.hpp>
18 #include <string_view>
21 namespace CUDF_EXPORT
cudf {
53 std::vector<parquet_column_schema> children)
54 : _name{name}, _type{type}, _children{std::move(children)}
63 [[nodiscard]]
auto name()
const {
return _name; }
70 [[nodiscard]]
auto type()
const {
return _type; }
77 [[nodiscard]]
auto const&
children() const& {
return _children; }
83 [[nodiscard]]
auto children() && {
return std::move(_children); }
92 [[nodiscard]]
auto const&
child(
int idx)
const& {
return children().at(idx); }
98 [[nodiscard]]
auto child(
int idx) && {
return std::move(children().at(idx)); }
111 std::vector<parquet_column_schema> _children;
138 [[nodiscard]]
auto const&
root() const& {
return _root; }
144 [[nodiscard]]
auto root() && {
return std::move(_root); }
184 std::vector<size_type> num_rowgroups_per_file,
186 std::vector<row_group_metadata> rg_metadata,
188 : _schema{std::move(schema)},
190 _num_rowgroups{num_rowgroups},
191 _num_rowgroups_per_file{std::move(num_rowgroups_per_file)},
192 _file_metadata{std::move(file_metadata)},
193 _rowgroup_metadata{std::move(rg_metadata)},
203 [[nodiscard]]
auto const&
schema()
const {
return _schema; }
212 [[nodiscard]]
auto num_rows()
const {
return _num_rows; }
233 [[nodiscard]]
auto const&
metadata()
const {
return _file_metadata; }
255 std::vector<size_type> _num_rowgroups_per_file;
256 key_value_metadata _file_metadata;
257 std::vector<row_group_metadata> _rowgroup_metadata;
258 column_chunk_metadata _column_chunk_metadata;
std::vector< parquet::FileMetaData > read_parquet_footers(cudf::host_span< std::unique_ptr< cudf::io::datasource > const > sources)
Constructs FileMetaData objects from parquet dataset.
parquet_metadata read_parquet_metadata(source_info const &src_info)
Reads metadata of parquet dataset.
Type
Basic data types in Parquet, determines how data is physically stored.
int32_t size_type
Row index type for columns and tables.
cuDF-IO API type definitions
Parquet footer schema structs.
C++20 std::span with reduced feature set.
Schema of a parquet column, including the nested columns.
auto const & child(int idx) const &
Returns schema of the child with the given index.
auto name() const
Returns parquet column name; can be empty.
auto const & children() const &
Returns schemas of all child columns.
auto type() const
Returns parquet physical type of the column.
auto children() &&
Returns schemas of all child columns.
auto num_children() const
Returns the number of child columns.
parquet_column_schema()=default
Default constructor.
parquet_column_schema(std::string_view name, Type type, std::vector< parquet_column_schema > children)
constructor
auto child(int idx) &&
Returns schema of the child with the given index.
Schema of a parquet file.
parquet_schema()=default
Default constructor.
auto root() &&
Returns the schema of the struct column that contains all columns as fields.
auto const & root() const &
Returns the schema of the struct column that contains all columns as fields.
parquet_schema(parquet_column_schema root_column_schema)
constructor
Source information for read interfaces.