26 #include <cudf/utilities/export.hpp>
32 namespace CUDF_EXPORT
cudf {
240 std::vector<orc_column_schema> children)
241 : _name{name}, _type_kind{type}, _children{std::move(children)}
250 [[nodiscard]]
auto name()
const {
return _name; }
257 [[nodiscard]]
auto type_kind()
const {
return _type_kind; }
264 [[nodiscard]]
auto const&
children() const& {
return _children; }
270 [[nodiscard]]
auto children() && {
return std::move(_children); }
279 [[nodiscard]]
auto const&
child(
int idx)
const& {
return children().at(idx); }
285 [[nodiscard]]
auto child(
int idx) && {
return std::move(children().at(idx)); }
297 std::vector<orc_column_schema> _children;
317 [[nodiscard]]
auto const&
root() const& {
return _root; }
323 [[nodiscard]]
auto root() && {
return std::move(_root); }
342 : _schema{std::move(schema)}, _num_rows{num_rows}, _num_stripes{num_stripes}
351 [[nodiscard]]
auto const&
schema()
const {
return _schema; }
361 [[nodiscard]]
auto num_rows()
const {
return _num_rows; }
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
raw_orc_statistics read_raw_orc_statistics(source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())
Reads file-level and stripe-level statistics of ORC dataset.
parsed_orc_statistics read_parsed_orc_statistics(source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())
Reads file-level and stripe-level statistics of ORC dataset.
orc_metadata read_orc_metadata(source_info const &src_info, rmm::cuda_stream_view stream=cudf::get_default_stream())
Reads metadata of ORC dataset.
sum_statistics< int64_t > binary_statistics
Statistics for binary columns.
minmax_statistics< int32_t > date_statistics
Statistics for date(time) columns.
std::variant< no_statistics, integer_statistics, double_statistics, string_statistics, bucket_statistics, decimal_statistics, date_statistics, binary_statistics, timestamp_statistics > statistics_type
Variant type for ORC type-specific column statistics.
std::monostate no_statistics
Monostate type alias for the statistics variant.
TypeKind
Identifies a data type in an orc file.
int32_t size_type
Row index type for columns and tables.
cuDF-IO API type definitions
Statistics for boolean columns.
std::vector< uint64_t > count
count of true values
Contains per-column ORC statistics.
statistics_type type_specific_stats
type-specific statistics
std::optional< uint64_t > number_of_values
number of statistics
std::optional< bool > has_null
column has any nulls
column_statistics(orc::column_statistics &&detail_statistics)
Construct a new column statistics object.
Statistics for decimal columns.
Statistics for floating point columns.
Statistics for integral columns.
Base class for column statistics that include optional minimum and maximum.
std::optional< T > minimum
Minimum value.
std::optional< T > maximum
Maximum value.
Schema of an ORC column, including the nested columns.
auto const & children() const &
Returns schemas of all child columns.
orc_column_schema(std::string_view name, orc::TypeKind type, std::vector< orc_column_schema > children)
constructor
auto child(int idx) &&
Returns schema of the child with the given index.
auto type_kind() const
Returns ORC type of the column.
auto const & child(int idx) const &
Returns schema of the child with the given index.
auto name() const
Returns ORC column name; can be empty.
auto num_children() const
Returns the number of child columns.
auto children() &&
Returns schemas of all child columns.
auto root() &&
Returns the schema of the struct column that contains all columns as fields.
auto const & root() const &
Returns the schema of the struct column that contains all columns as fields.
orc_schema(orc_column_schema root_column_schema)
constructor
Holds column names and parsed file-level and stripe-level statistics.
std::vector< std::vector< column_statistics > > stripes_stats
stripe-level statistics
std::vector< std::string > column_names
column names
std::vector< column_statistics > file_stats
file-level statistics
Holds column names and buffers containing raw file-level and stripe-level statistics.
std::vector< std::vector< std::string > > stripes_stats
Stripe-level statistics for each column.
std::vector< std::string > column_names
Column names.
std::vector< std::string > file_stats
File-level statistics for each column.
Source information for read interfaces.
Statistics for string columns.
Base class for column statistics that include an optional sum.
std::optional< T > sum
Sum of values in column.
Statistics for timestamp columns.
std::optional< uint32_t > minimum_nanos
nanoseconds part of the minimum
std::optional< uint32_t > maximum_nanos
nanoseconds part of the maximum
std::optional< int64_t > minimum_utc
minimum in milliseconds
std::optional< int64_t > maximum_utc
maximum in milliseconds