Base settings for write_parquet()
and parquet_chunked_writer
.
More...
#include <parquet.hpp>
Public Member Functions | |
parquet_writer_options_base ()=default | |
Default constructor. More... | |
sink_info const & | get_sink () const |
Returns sink info. More... | |
compression_type | get_compression () const |
Returns compression format used. More... | |
statistics_freq | get_stats_level () const |
Returns level of statistics requested in output file. More... | |
auto const & | get_metadata () const |
Returns associated metadata. More... | |
std::vector< std::map< std::string, std::string > > const & | get_key_value_metadata () const |
Returns Key-Value footer metadata information. More... | |
bool | is_enabled_int96_timestamps () const |
Returns true if timestamps will be written as INT96. More... | |
auto | is_enabled_utc_timestamps () const |
Returns true if timestamps will be written as UTC. More... | |
auto | is_enabled_write_arrow_schema () const |
Returns true if arrow schema will be written. More... | |
auto | get_row_group_size_bytes () const |
Returns maximum row group size, in bytes. More... | |
auto | get_row_group_size_rows () const |
Returns maximum row group size, in rows. More... | |
auto | get_max_page_size_bytes () const |
Returns the maximum uncompressed page size, in bytes. More... | |
auto | get_max_page_size_rows () const |
Returns maximum page size, in rows. More... | |
auto | get_column_index_truncate_length () const |
Returns maximum length of min or max values in column index, in bytes. More... | |
dictionary_policy | get_dictionary_policy () const |
Returns policy for dictionary use. More... | |
auto | get_max_dictionary_size () const |
Returns maximum dictionary size, in bytes. More... | |
auto | get_max_page_fragment_size () const |
Returns maximum page fragment size, in rows. More... | |
std::shared_ptr< writer_compression_statistics > | get_compression_statistics () const |
Returns a shared pointer to the user-provided compression statistics. More... | |
auto | is_enabled_write_v2_headers () const |
Returns true if V2 page headers should be written. More... | |
auto const & | get_sorting_columns () const |
Returns the sorting_columns. More... | |
void | set_metadata (table_input_metadata metadata) |
Sets metadata. More... | |
void | set_key_value_metadata (std::vector< std::map< std::string, std::string >> metadata) |
Sets metadata. More... | |
void | set_stats_level (statistics_freq sf) |
Sets the level of statistics. More... | |
void | set_compression (compression_type compression) |
Sets compression type. More... | |
void | enable_int96_timestamps (bool req) |
Sets timestamp writing preferences. INT96 timestamps will be written if true and TIMESTAMP_MICROS will be written if false . More... | |
void | enable_utc_timestamps (bool val) |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true . More... | |
void | enable_write_arrow_schema (bool val) |
Sets preference for writing arrow schema. Write arrow schema if set to true . More... | |
void | set_row_group_size_bytes (size_t size_bytes) |
Sets the maximum row group size, in bytes. More... | |
void | set_row_group_size_rows (size_type size_rows) |
Sets the maximum row group size, in rows. More... | |
void | set_max_page_size_bytes (size_t size_bytes) |
Sets the maximum uncompressed page size, in bytes. More... | |
void | set_max_page_size_rows (size_type size_rows) |
Sets the maximum page size, in rows. More... | |
void | set_column_index_truncate_length (int32_t size_bytes) |
Sets the maximum length of min or max values in column index, in bytes. More... | |
void | set_dictionary_policy (dictionary_policy policy) |
Sets the policy for dictionary use. More... | |
void | set_max_dictionary_size (size_t size_bytes) |
Sets the maximum dictionary size, in bytes. More... | |
void | set_max_page_fragment_size (size_type size_rows) |
Sets the maximum page fragment size, in rows. More... | |
void | set_compression_statistics (std::shared_ptr< writer_compression_statistics > comp_stats) |
Sets the pointer to the output compression statistics. More... | |
void | enable_write_v2_headers (bool val) |
Sets preference for V2 page headers. Write V2 page headers if set to true . More... | |
void | set_sorting_columns (std::vector< sorting_column > sorting_columns) |
Sets sorting columns. More... | |
Protected Member Functions | |
parquet_writer_options_base (sink_info sink) | |
Constructor from sink. More... | |
Base settings for write_parquet()
and parquet_chunked_writer
.
Definition at line 623 of file parquet.hpp.
|
inlineexplicitprotected |
Constructor from sink.
sink | The sink used for writer output |
Definition at line 671 of file parquet.hpp.
|
default |
Default constructor.
This has been added since Cython requires a default constructor to create objects on stack.
void cudf::io::parquet_writer_options_base::enable_int96_timestamps | ( | bool | req | ) |
Sets timestamp writing preferences. INT96 timestamps will be written if true
and TIMESTAMP_MICROS will be written if false
.
req | Boolean value to enable/disable writing of INT96 timestamps |
void cudf::io::parquet_writer_options_base::enable_utc_timestamps | ( | bool | val | ) |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true
.
val | Boolean value to enable/disable writing of timestamps as UTC. |
void cudf::io::parquet_writer_options_base::enable_write_arrow_schema | ( | bool | val | ) |
Sets preference for writing arrow schema. Write arrow schema if set to true
.
val | Boolean value to enable/disable writing of arrow schema. |
void cudf::io::parquet_writer_options_base::enable_write_v2_headers | ( | bool | val | ) |
Sets preference for V2 page headers. Write V2 page headers if set to true
.
val | Boolean value to enable/disable writing of V2 page headers. |
|
inline |
Returns maximum length of min or max values in column index, in bytes.
Definition at line 784 of file parquet.hpp.
|
inline |
Returns compression format used.
Definition at line 693 of file parquet.hpp.
|
inline |
Returns a shared pointer to the user-provided compression statistics.
Definition at line 815 of file parquet.hpp.
|
inline |
Returns policy for dictionary use.
Definition at line 794 of file parquet.hpp.
|
inline |
Returns Key-Value footer metadata information.
Definition at line 714 of file parquet.hpp.
|
inline |
Returns maximum dictionary size, in bytes.
Definition at line 801 of file parquet.hpp.
|
inline |
Returns maximum page fragment size, in rows.
Definition at line 808 of file parquet.hpp.
|
inline |
Returns the maximum uncompressed page size, in bytes.
If set larger than the row group size, then this will return the row group size.
Definition at line 762 of file parquet.hpp.
|
inline |
Returns maximum page size, in rows.
If set larger than the row group size, then this will return the row group size.
Definition at line 774 of file parquet.hpp.
|
inline |
|
inline |
Returns maximum row group size, in bytes.
Definition at line 746 of file parquet.hpp.
|
inline |
Returns maximum row group size, in rows.
Definition at line 753 of file parquet.hpp.
|
inline |
|
inline |
Returns the sorting_columns.
Definition at line 832 of file parquet.hpp.
|
inline |
Returns level of statistics requested in output file.
Definition at line 700 of file parquet.hpp.
|
inline |
Returns true
if timestamps will be written as INT96.
true
if timestamps will be written as INT96 Definition at line 725 of file parquet.hpp.
|
inline |
Returns true
if timestamps will be written as UTC.
true
if timestamps will be written as UTC Definition at line 732 of file parquet.hpp.
|
inline |
Returns true
if arrow schema will be written.
true
if arrow schema will be written Definition at line 739 of file parquet.hpp.
|
inline |
Returns true
if V2 page headers should be written.
true
if V2 page headers should be written. Definition at line 825 of file parquet.hpp.
void cudf::io::parquet_writer_options_base::set_column_index_truncate_length | ( | int32_t | size_bytes | ) |
Sets the maximum length of min or max values in column index, in bytes.
size_bytes | length min/max will be truncated to |
void cudf::io::parquet_writer_options_base::set_compression | ( | compression_type | compression | ) |
Sets compression type.
compression | The compression type to use |
void cudf::io::parquet_writer_options_base::set_compression_statistics | ( | std::shared_ptr< writer_compression_statistics > | comp_stats | ) |
Sets the pointer to the output compression statistics.
comp_stats | Pointer to compression statistics to be updated after writing |
void cudf::io::parquet_writer_options_base::set_dictionary_policy | ( | dictionary_policy | policy | ) |
Sets the policy for dictionary use.
policy | Policy for dictionary use |
void cudf::io::parquet_writer_options_base::set_key_value_metadata | ( | std::vector< std::map< std::string, std::string >> | metadata | ) |
Sets metadata.
metadata | Key-Value footer metadata |
void cudf::io::parquet_writer_options_base::set_max_dictionary_size | ( | size_t | size_bytes | ) |
Sets the maximum dictionary size, in bytes.
size_bytes | Maximum dictionary size, in bytes |
void cudf::io::parquet_writer_options_base::set_max_page_fragment_size | ( | size_type | size_rows | ) |
Sets the maximum page fragment size, in rows.
size_rows | Maximum page fragment size, in rows. |
void cudf::io::parquet_writer_options_base::set_max_page_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum uncompressed page size, in bytes.
size_bytes | Maximum uncompressed page size, in bytes to set |
void cudf::io::parquet_writer_options_base::set_max_page_size_rows | ( | size_type | size_rows | ) |
Sets the maximum page size, in rows.
size_rows | Maximum page size, in rows to set |
void cudf::io::parquet_writer_options_base::set_metadata | ( | table_input_metadata | metadata | ) |
Sets metadata.
metadata | Associated metadata |
void cudf::io::parquet_writer_options_base::set_row_group_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum row group size, in bytes.
size_bytes | Maximum row group size, in bytes to set |
void cudf::io::parquet_writer_options_base::set_row_group_size_rows | ( | size_type | size_rows | ) |
Sets the maximum row group size, in rows.
size_rows | Maximum row group size, in rows to set |
void cudf::io::parquet_writer_options_base::set_sorting_columns | ( | std::vector< sorting_column > | sorting_columns | ) |
Sets sorting columns.
sorting_columns | Column sort order metadata |
void cudf::io::parquet_writer_options_base::set_stats_level | ( | statistics_freq | sf | ) |
Sets the level of statistics.
sf | Level of statistics requested in the output file |