Files
file	orc_metadata.hpp
	cuDF-IO freeform API

file	orc_types.hpp

file	parquet_metadata.hpp
	cuDF-IO freeform API

file	io/types.hpp
	cuDF-IO API type definitions

Classes
struct	cudf::io::raw_orc_statistics
	Holds column names and buffers containing raw file-level and stripe-level statistics. More...

struct	cudf::io::minmax_statistics< T >
	Base class for column statistics that include optional minimum and maximum. More...

struct	cudf::io::sum_statistics< T >
	Base class for column statistics that include an optional sum. More...

struct	cudf::io::integer_statistics
	Statistics for integral columns. More...

struct	cudf::io::double_statistics
	Statistics for floating point columns. More...

struct	cudf::io::string_statistics
	Statistics for string columns. More...

struct	cudf::io::bucket_statistics
	Statistics for boolean columns. More...

struct	cudf::io::decimal_statistics
	Statistics for decimal columns. More...

struct	cudf::io::timestamp_statistics
	Statistics for timestamp columns. More...

struct	cudf::io::column_statistics
	Contains per-column ORC statistics. More...

struct	cudf::io::parsed_orc_statistics
	Holds column names and parsed file-level and stripe-level statistics. More...

struct	cudf::io::orc_column_schema
	Schema of an ORC column, including the nested columns. More...

struct	cudf::io::orc_schema
	Schema of an ORC file. More...

class	cudf::io::orc_metadata
	Information about content of an ORC file. More...

struct	cudf::io::parquet_column_schema
	Schema of a parquet column, including the nested columns. More...

struct	cudf::io::parquet_schema
	Schema of a parquet file. More...

class	cudf::io::parquet_metadata
	Information about content of a parquet file. More...

class	cudf::io::writer_compression_statistics
	Statistics about compression performed by a writer. More...

struct	cudf::io::column_name_info
	Detailed name (and optionally nullability) information for output columns. More...

struct	cudf::io::table_metadata
	Table metadata returned by IO readers. More...

struct	cudf::io::table_with_metadata
	Table with table metadata used by io readers to return the metadata by value. More...

struct	cudf::io::host_buffer
	Non-owning view of a host memory buffer. More...

struct	cudf::io::source_info
	Source information for read interfaces. More...

struct	cudf::io::sink_info
	Destination information for write interfaces. More...

class	cudf::io::column_in_metadata
	Metadata for a column. More...

class	cudf::io::table_input_metadata
	Metadata for a table. More...

struct	cudf::io::partition_info
	Information used while writing partitioned datasets. More...

class	cudf::io::reader_column_schema
	schema element for reader More...

Typedefs
using	cudf::io::no_statistics = std::monostate
	Monostate type alias for the statistics variant.

using	cudf::io::date_statistics = minmax_statistics< int32_t >
	Statistics for date(time) columns.

using	cudf::io::binary_statistics = sum_statistics< int64_t >
	Statistics for binary columns. More...

using	cudf::io::statistics_type = std::variant< no_statistics, integer_statistics, double_statistics, string_statistics, bucket_statistics, decimal_statistics, date_statistics, binary_statistics, timestamp_statistics >
	Variant type for ORC type-specific column statistics. More...

Enumerations
enum	cudf::io::orc::CompressionKind : uint8_t { NONE = 0 , ZLIB = 1 , SNAPPY = 2 , LZO = 3 , LZ4 = 4 , ZSTD = 5 }
	Identifies a compression algorithm.

enum	cudf::io::orc::TypeKind : int8_t { INVALID_TYPE_KIND = -1 , BOOLEAN = 0 , BYTE = 1 , SHORT = 2 , INT = 3 , LONG = 4 , FLOAT = 5 , DOUBLE = 6 , STRING = 7 , BINARY = 8 , TIMESTAMP = 9 , LIST = 10 , MAP = 11 , STRUCT = 12 , UNION = 13 , DECIMAL = 14 , DATE = 15 , VARCHAR = 16 , CHAR = 17 }
	Identifies a data type in an orc file.

enum	cudf::io::orc::StreamKind : int8_t { INVALID_STREAM_KIND = -1 , PRESENT = 0 , DATA = 1 , LENGTH = 2 , DICTIONARY_DATA = 3 , DICTIONARY_COUNT = 4 , SECONDARY = 5 , ROW_INDEX = 6 , BLOOM_FILTER = 7 , BLOOM_FILTER_UTF8 = 8 }
	Identifies the type of data stream.

enum	cudf::io::orc::ColumnEncodingKind : int8_t { INVALID_ENCODING_KIND = -1 , DIRECT = 0 , DICTIONARY = 1 , DIRECT_V2 = 2 , DICTIONARY_V2 = 3 }
	Identifies the encoding of columns.

enum	cudf::io::orc::ProtofType : uint8_t { VARINT = 0 , FIXED64 = 1 , FIXEDLEN = 2 , START_GROUP = 3 , END_GROUP = 4 , FIXED32 = 5 , INVALID_6 = 6 , INVALID_7 = 7 }
	Identifies the type of encoding in a protocol buffer.

enum class	cudf::io::compression_type : int32_t { cudf::io::NONE , cudf::io::AUTO , cudf::io::SNAPPY , cudf::io::GZIP , cudf::io::BZIP2 , cudf::io::BROTLI , cudf::io::ZIP , cudf::io::XZ , cudf::io::ZLIB , cudf::io::LZ4 , cudf::io::LZO , cudf::io::ZSTD }
	Compression algorithms. More...

enum class	cudf::io::io_type : int32_t { cudf::io::FILEPATH , cudf::io::HOST_BUFFER , cudf::io::DEVICE_BUFFER , cudf::io::VOID , cudf::io::USER_IMPLEMENTED }
	Data source or destination types. More...

enum class	cudf::io::quote_style : int32_t { cudf::io::MINIMAL , cudf::io::ALL , cudf::io::NONNUMERIC , cudf::io::NONE }
	Behavior when handling quotations in field data. More...

enum	cudf::io::statistics_freq : int32_t { cudf::io::STATISTICS_NONE = 0 , cudf::io::STATISTICS_ROWGROUP = 1 , cudf::io::STATISTICS_PAGE = 2 , cudf::io::STATISTICS_COLUMN = 3 }
	Column statistics granularity type for parquet/orc writers. More...

enum class	cudf::io::column_encoding : int32_t { cudf::io::USE_DEFAULT = -1 , cudf::io::DICTIONARY , cudf::io::PLAIN , cudf::io::DELTA_BINARY_PACKED , cudf::io::DELTA_LENGTH_BYTE_ARRAY , cudf::io::DELTA_BYTE_ARRAY , cudf::io::BYTE_STREAM_SPLIT , cudf::io::DIRECT , cudf::io::DIRECT_V2 , cudf::io::DICTIONARY_V2 }
	Valid encodings for use with `column_in_metadata::set_encoding()` More...

enum	cudf::io::dictionary_policy : int32_t { cudf::io::NEVER = 0 , cudf::io::ADAPTIVE = 1 , cudf::io::ALWAYS = 2 }
	Control use of dictionary encoding for parquet writer. More...

Functions
template<typename T >
constexpr auto	cudf::io::is_byte_like_type ()
	Returns `true` if the type is byte-like, meaning it is reasonable to pass as a pointer to bytes. More...

Detailed Description

Typedef Documentation

◆ binary_statistics

using cudf::io::binary_statistics = typedef sum_statistics<int64_t>

Statistics for binary columns.

The sum is the total number of bytes across all elements.

Definition at line 143 of file orc_metadata.hpp.

◆ statistics_type

using cudf::io::statistics_type = typedef std::variant<no_statistics, integer_statistics, double_statistics, string_statistics, bucket_statistics, decimal_statistics, date_statistics, binary_statistics, timestamp_statistics>

Variant type for ORC type-specific column statistics.

The variant can hold any of the supported column statistics types.

Definition at line 163 of file orc_metadata.hpp.

Enumeration Type Documentation

◆ column_encoding

enum cudf::io::column_encoding : int32_t

strong

Valid encodings for use with column_in_metadata::set_encoding()

Enumerator
USE_DEFAULT	No encoding has been requested, use default encoding.
DICTIONARY	Use dictionary encoding.
PLAIN	Use plain encoding.
DELTA_BINARY_PACKED	Use DELTA_BINARY_PACKED encoding (only valid for integer columns)
DELTA_LENGTH_BYTE_ARRAY	Use DELTA_LENGTH_BYTE_ARRAY encoding (only valid for BYTE_ARRAY columns)
DELTA_BYTE_ARRAY	Use DELTA_BYTE_ARRAY encoding (only valid for BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY columns)
BYTE_STREAM_SPLIT	Use BYTE_STREAM_SPLIT encoding (valid for all fixed width types)
DIRECT	Use DIRECT encoding.
DIRECT_V2	Use DIRECT_V2 encoding.
DICTIONARY_V2	Use DICTIONARY_V2 encoding.

Definition at line 106 of file io/types.hpp.

◆ compression_type

enum cudf::io::compression_type : int32_t

strong

Compression algorithms.

Enumerator
NONE	No compression.
AUTO	Automatically detect or select compression format.
SNAPPY	Snappy format, using byte-oriented LZ77.
GZIP	GZIP format, using DEFLATE algorithm.
BZIP2	BZIP2 format, using Burrows-Wheeler transform.
BROTLI	BROTLI format, using LZ77 + Huffman + 2nd order context modeling.
ZIP	ZIP format, using DEFLATE algorithm.
XZ	XZ format, using LZMA(2) algorithm.
ZLIB	ZLIB format, using DEFLATE algorithm.
LZ4	LZ4 format, using LZ77.
LZO	Lempel–Ziv–Oberhumer format.
ZSTD	Zstandard format.

Definition at line 57 of file io/types.hpp.

◆ dictionary_policy

enum cudf::io::dictionary_policy : int32_t

Control use of dictionary encoding for parquet writer.

Enumerator
NEVER	Never use dictionary encoding.
ADAPTIVE	Use dictionary when it will not impact compression.
ALWAYS	Use dictionary regardless of impact on compression.

Definition at line 225 of file io/types.hpp.

◆ io_type

enum cudf::io::io_type : int32_t

strong

Data source or destination types.

Enumerator
FILEPATH	Input/output is a file path.
HOST_BUFFER	Input/output is a buffer in host memory.
DEVICE_BUFFER	Input/output is a buffer in device memory.
VOID	Input/output is nothing. No work is done. Useful for benchmarking.
USER_IMPLEMENTED	Input/output is handled by a custom user class.

Definition at line 75 of file io/types.hpp.

◆ quote_style

enum cudf::io::quote_style : int32_t

strong

Behavior when handling quotations in field data.

Enumerator
MINIMAL	Quote only fields which contain special characters.
ALL	Quote all fields.
NONNUMERIC	Quote all non-numeric fields.
NONE	Never quote fields; disable quotation parsing.

Definition at line 86 of file io/types.hpp.

◆ statistics_freq

enum cudf::io::statistics_freq : int32_t

Column statistics granularity type for parquet/orc writers.

Enumerator
STATISTICS_NONE	No column statistics.
STATISTICS_ROWGROUP	Per-Rowgroup column statistics.
STATISTICS_PAGE	Per-page column statistics.
STATISTICS_COLUMN	Full column and offset indices. Implies STATISTICS_ROWGROUP.

Definition at line 96 of file io/types.hpp.

Function Documentation

◆ is_byte_like_type()

template<typename T >

constexpr auto cudf::io::is_byte_like_type ( )

inlineconstexpr

Returns true if the type is byte-like, meaning it is reasonable to pass as a pointer to bytes.

Template Parameters

T	The representation type

Returns: true if the type is considered a byte-like type

Definition at line 337 of file io/types.hpp.

Files

Classes

Typedefs

Enumerations

Functions

Detailed Description

Typedef Documentation

◆ binary_statistics

◆ statistics_type

Enumeration Type Documentation

◆ column_encoding

◆ compression_type

◆ dictionary_policy

◆ io_type

◆ quote_style

◆ statistics_freq

Function Documentation

◆ is_byte_like_type()