Base settings for write_parquet()
and parquet_chunked_writer
.
More...
#include <parquet.hpp>
Public Member Functions | |
parquet_writer_options_base ()=default | |
Default constructor. More... | |
sink_info const & | get_sink () const |
Returns sink info. More... | |
compression_type | get_compression () const |
Returns compression format used. More... | |
statistics_freq | get_stats_level () const |
Returns level of statistics requested in output file. More... | |
auto const & | get_metadata () const |
Returns associated metadata. More... | |
std::vector< std::map< std::string, std::string > > const & | get_key_value_metadata () const |
Returns Key-Value footer metadata information. More... | |
bool | is_enabled_int96_timestamps () const |
Returns true if timestamps will be written as INT96. More... | |
auto | is_enabled_utc_timestamps () const |
Returns true if timestamps will be written as UTC. More... | |
auto | is_enabled_write_arrow_schema () const |
Returns true if arrow schema will be written. More... | |
auto | get_row_group_size_bytes () const |
Returns maximum row group size, in bytes. More... | |
auto | get_row_group_size_rows () const |
Returns maximum row group size, in rows. More... | |
auto | get_max_page_size_bytes () const |
Returns the maximum uncompressed page size, in bytes. More... | |
auto | get_max_page_size_rows () const |
Returns maximum page size, in rows. More... | |
auto | get_column_index_truncate_length () const |
Returns maximum length of min or max values in column index, in bytes. More... | |
dictionary_policy | get_dictionary_policy () const |
Returns policy for dictionary use. More... | |
auto | get_max_dictionary_size () const |
Returns maximum dictionary size, in bytes. More... | |
auto | get_max_page_fragment_size () const |
Returns maximum page fragment size, in rows. More... | |
std::shared_ptr< writer_compression_statistics > | get_compression_statistics () const |
Returns a shared pointer to the user-provided compression statistics. More... | |
auto | is_enabled_write_v2_headers () const |
Returns true if V2 page headers should be written. More... | |
auto const & | get_sorting_columns () const |
Returns the sorting_columns. More... | |
void | set_metadata (table_input_metadata metadata) |
Sets metadata. More... | |
void | set_key_value_metadata (std::vector< std::map< std::string, std::string >> metadata) |
Sets metadata. More... | |
void | set_stats_level (statistics_freq sf) |
Sets the level of statistics. More... | |
void | set_compression (compression_type compression) |
Sets compression type. More... | |
void | enable_int96_timestamps (bool req) |
Sets timestamp writing preferences. INT96 timestamps will be written if true and TIMESTAMP_MICROS will be written if false . More... | |
void | enable_utc_timestamps (bool val) |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true . More... | |
void | enable_write_arrow_schema (bool val) |
Sets preference for writing arrow schema. Write arrow schema if set to true . More... | |
void | set_row_group_size_bytes (size_t size_bytes) |
Sets the maximum row group size, in bytes. More... | |
void | set_row_group_size_rows (size_type size_rows) |
Sets the maximum row group size, in rows. More... | |
void | set_max_page_size_bytes (size_t size_bytes) |
Sets the maximum uncompressed page size, in bytes. More... | |
void | set_max_page_size_rows (size_type size_rows) |
Sets the maximum page size, in rows. More... | |
void | set_column_index_truncate_length (int32_t size_bytes) |
Sets the maximum length of min or max values in column index, in bytes. More... | |
void | set_dictionary_policy (dictionary_policy policy) |
Sets the policy for dictionary use. More... | |
void | set_max_dictionary_size (size_t size_bytes) |
Sets the maximum dictionary size, in bytes. More... | |
void | set_max_page_fragment_size (size_type size_rows) |
Sets the maximum page fragment size, in rows. More... | |
void | set_compression_statistics (std::shared_ptr< writer_compression_statistics > comp_stats) |
Sets the pointer to the output compression statistics. More... | |
void | enable_write_v2_headers (bool val) |
Sets preference for V2 page headers. Write V2 page headers if set to true . More... | |
void | set_sorting_columns (std::vector< sorting_column > sorting_columns) |
Sets sorting columns. More... | |
Protected Member Functions | |
parquet_writer_options_base (sink_info sink) | |
Constructor from sink. More... | |
Base settings for write_parquet()
and parquet_chunked_writer
.
Definition at line 622 of file parquet.hpp.
|
inlineexplicitprotected |
Constructor from sink.
sink | The sink used for writer output |
Definition at line 670 of file parquet.hpp.
|
default |
Default constructor.
This has been added since Cython requires a default constructor to create objects on stack.
void cudf::io::parquet_writer_options_base::enable_int96_timestamps | ( | bool | req | ) |
Sets timestamp writing preferences. INT96 timestamps will be written if true
and TIMESTAMP_MICROS will be written if false
.
req | Boolean value to enable/disable writing of INT96 timestamps |
void cudf::io::parquet_writer_options_base::enable_utc_timestamps | ( | bool | val | ) |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true
.
val | Boolean value to enable/disable writing of timestamps as UTC. |
void cudf::io::parquet_writer_options_base::enable_write_arrow_schema | ( | bool | val | ) |
Sets preference for writing arrow schema. Write arrow schema if set to true
.
val | Boolean value to enable/disable writing of arrow schema. |
void cudf::io::parquet_writer_options_base::enable_write_v2_headers | ( | bool | val | ) |
Sets preference for V2 page headers. Write V2 page headers if set to true
.
val | Boolean value to enable/disable writing of V2 page headers. |
|
inline |
Returns maximum length of min or max values in column index, in bytes.
Definition at line 783 of file parquet.hpp.
|
inline |
Returns compression format used.
Definition at line 692 of file parquet.hpp.
|
inline |
Returns a shared pointer to the user-provided compression statistics.
Definition at line 814 of file parquet.hpp.
|
inline |
Returns policy for dictionary use.
Definition at line 793 of file parquet.hpp.
|
inline |
Returns Key-Value footer metadata information.
Definition at line 713 of file parquet.hpp.
|
inline |
Returns maximum dictionary size, in bytes.
Definition at line 800 of file parquet.hpp.
|
inline |
Returns maximum page fragment size, in rows.
Definition at line 807 of file parquet.hpp.
|
inline |
Returns the maximum uncompressed page size, in bytes.
If set larger than the row group size, then this will return the row group size.
Definition at line 761 of file parquet.hpp.
|
inline |
Returns maximum page size, in rows.
If set larger than the row group size, then this will return the row group size.
Definition at line 773 of file parquet.hpp.
|
inline |
|
inline |
Returns maximum row group size, in bytes.
Definition at line 745 of file parquet.hpp.
|
inline |
Returns maximum row group size, in rows.
Definition at line 752 of file parquet.hpp.
|
inline |
|
inline |
Returns the sorting_columns.
Definition at line 831 of file parquet.hpp.
|
inline |
Returns level of statistics requested in output file.
Definition at line 699 of file parquet.hpp.
|
inline |
Returns true
if timestamps will be written as INT96.
true
if timestamps will be written as INT96 Definition at line 724 of file parquet.hpp.
|
inline |
Returns true
if timestamps will be written as UTC.
true
if timestamps will be written as UTC Definition at line 731 of file parquet.hpp.
|
inline |
Returns true
if arrow schema will be written.
true
if arrow schema will be written Definition at line 738 of file parquet.hpp.
|
inline |
Returns true
if V2 page headers should be written.
true
if V2 page headers should be written. Definition at line 824 of file parquet.hpp.
void cudf::io::parquet_writer_options_base::set_column_index_truncate_length | ( | int32_t | size_bytes | ) |
Sets the maximum length of min or max values in column index, in bytes.
size_bytes | length min/max will be truncated to |
void cudf::io::parquet_writer_options_base::set_compression | ( | compression_type | compression | ) |
Sets compression type.
compression | The compression type to use |
void cudf::io::parquet_writer_options_base::set_compression_statistics | ( | std::shared_ptr< writer_compression_statistics > | comp_stats | ) |
Sets the pointer to the output compression statistics.
comp_stats | Pointer to compression statistics to be updated after writing |
void cudf::io::parquet_writer_options_base::set_dictionary_policy | ( | dictionary_policy | policy | ) |
Sets the policy for dictionary use.
policy | Policy for dictionary use |
void cudf::io::parquet_writer_options_base::set_key_value_metadata | ( | std::vector< std::map< std::string, std::string >> | metadata | ) |
Sets metadata.
metadata | Key-Value footer metadata |
void cudf::io::parquet_writer_options_base::set_max_dictionary_size | ( | size_t | size_bytes | ) |
Sets the maximum dictionary size, in bytes.
size_bytes | Maximum dictionary size, in bytes |
void cudf::io::parquet_writer_options_base::set_max_page_fragment_size | ( | size_type | size_rows | ) |
Sets the maximum page fragment size, in rows.
size_rows | Maximum page fragment size, in rows. |
void cudf::io::parquet_writer_options_base::set_max_page_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum uncompressed page size, in bytes.
size_bytes | Maximum uncompressed page size, in bytes to set |
void cudf::io::parquet_writer_options_base::set_max_page_size_rows | ( | size_type | size_rows | ) |
Sets the maximum page size, in rows.
size_rows | Maximum page size, in rows to set |
void cudf::io::parquet_writer_options_base::set_metadata | ( | table_input_metadata | metadata | ) |
Sets metadata.
metadata | Associated metadata |
void cudf::io::parquet_writer_options_base::set_row_group_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum row group size, in bytes.
size_bytes | Maximum row group size, in bytes to set |
void cudf::io::parquet_writer_options_base::set_row_group_size_rows | ( | size_type | size_rows | ) |
Sets the maximum row group size, in rows.
size_rows | Maximum row group size, in rows to set |
void cudf::io::parquet_writer_options_base::set_sorting_columns | ( | std::vector< sorting_column > | sorting_columns | ) |
Sets sorting columns.
sorting_columns | Column sort order metadata |
void cudf::io::parquet_writer_options_base::set_stats_level | ( | statistics_freq | sf | ) |
Sets the level of statistics.
sf | Level of statistics requested in the output file |