Settings for write_parquet()
.
More...
#include <parquet.hpp>
Public Member Functions | |
parquet_writer_options ()=default | |
Default constructor. More... | |
sink_info const & | get_sink () const |
Returns sink info. More... | |
compression_type | get_compression () const |
Returns compression format used. More... | |
statistics_freq | get_stats_level () const |
Returns level of statistics requested in output file. More... | |
table_view | get_table () const |
Returns table_view. More... | |
std::vector< partition_info > const & | get_partitions () const |
Returns partitions. More... | |
auto const & | get_metadata () const |
Returns associated metadata. More... | |
std::vector< std::map< std::string, std::string > > const & | get_key_value_metadata () const |
Returns Key-Value footer metadata information. More... | |
bool | is_enabled_int96_timestamps () const |
Returns true if timestamps will be written as INT96. More... | |
auto | is_enabled_utc_timestamps () const |
Returns true if timestamps will be written as UTC. More... | |
std::vector< std::string > const & | get_column_chunks_file_paths () const |
Returns Column chunks file paths to be set in the raw output metadata. More... | |
auto | get_row_group_size_bytes () const |
Returns maximum row group size, in bytes. More... | |
auto | get_row_group_size_rows () const |
Returns maximum row group size, in rows. More... | |
auto | get_max_page_size_bytes () const |
Returns the maximum uncompressed page size, in bytes. More... | |
auto | get_max_page_size_rows () const |
Returns maximum page size, in rows. More... | |
auto | get_column_index_truncate_length () const |
Returns maximum length of min or max values in column index, in bytes. More... | |
dictionary_policy | get_dictionary_policy () const |
Returns policy for dictionary use. More... | |
auto | get_max_dictionary_size () const |
Returns maximum dictionary size, in bytes. More... | |
auto | get_max_page_fragment_size () const |
Returns maximum page fragment size, in rows. More... | |
std::shared_ptr< writer_compression_statistics > | get_compression_statistics () const |
Returns a shared pointer to the user-provided compression statistics. More... | |
auto | is_enabled_write_v2_headers () const |
Returns true if V2 page headers should be written. More... | |
auto const & | get_sorting_columns () const |
Returns the sorting_columns. More... | |
void | set_partitions (std::vector< partition_info > partitions) |
Sets partitions. More... | |
void | set_metadata (table_input_metadata metadata) |
Sets metadata. More... | |
void | set_key_value_metadata (std::vector< std::map< std::string, std::string >> metadata) |
Sets metadata. More... | |
void | set_stats_level (statistics_freq sf) |
Sets the level of statistics. More... | |
void | set_compression (compression_type compression) |
Sets compression type. More... | |
void | enable_int96_timestamps (bool req) |
Sets timestamp writing preferences. INT96 timestamps will be written if true and TIMESTAMP_MICROS will be written if false . More... | |
void | enable_utc_timestamps (bool val) |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true . More... | |
void | set_column_chunks_file_paths (std::vector< std::string > file_paths) |
Sets column chunks file path to be set in the raw output metadata. More... | |
void | set_row_group_size_bytes (size_t size_bytes) |
Sets the maximum row group size, in bytes. More... | |
void | set_row_group_size_rows (size_type size_rows) |
Sets the maximum row group size, in rows. More... | |
void | set_max_page_size_bytes (size_t size_bytes) |
Sets the maximum uncompressed page size, in bytes. More... | |
void | set_max_page_size_rows (size_type size_rows) |
Sets the maximum page size, in rows. More... | |
void | set_column_index_truncate_length (int32_t size_bytes) |
Sets the maximum length of min or max values in column index, in bytes. More... | |
void | set_dictionary_policy (dictionary_policy policy) |
Sets the policy for dictionary use. More... | |
void | set_max_dictionary_size (size_t size_bytes) |
Sets the maximum dictionary size, in bytes. More... | |
void | set_max_page_fragment_size (size_type size_rows) |
Sets the maximum page fragment size, in rows. More... | |
void | set_compression_statistics (std::shared_ptr< writer_compression_statistics > comp_stats) |
Sets the pointer to the output compression statistics. More... | |
void | enable_write_v2_headers (bool val) |
Sets preference for V2 page headers. Write V2 page headers if set to true . More... | |
void | set_sorting_columns (std::vector< sorting_column > sorting_columns) |
Sets sorting columns. More... | |
Static Public Member Functions | |
static parquet_writer_options_builder | builder (sink_info const &sink, table_view const &table) |
Create builder to create parquet_writer_options . More... | |
static parquet_writer_options_builder | builder () |
Create builder to create parquet_writer_options . More... | |
Settings for write_parquet()
.
Definition at line 533 of file parquet.hpp.
|
default |
Default constructor.
This has been added since Cython requires a default constructor to create objects on stack.
|
static |
Create builder to create parquet_writer_options
.
|
static |
Create builder to create parquet_writer_options
.
sink | The sink used for writer output |
table | Table to be written to output |
|
inline |
Sets timestamp writing preferences. INT96 timestamps will be written if true
and TIMESTAMP_MICROS will be written if false
.
req | Boolean value to enable/disable writing of INT96 timestamps |
Definition at line 825 of file parquet.hpp.
|
inline |
Sets preference for writing timestamps as UTC. Write timestamps as UTC if set to true
.
val | Boolean value to enable/disable writing of timestamps as UTC. |
Definition at line 832 of file parquet.hpp.
|
inline |
Sets preference for V2 page headers. Write V2 page headers if set to true
.
val | Boolean value to enable/disable writing of V2 page headers. |
Definition at line 913 of file parquet.hpp.
|
inline |
Returns Column chunks file paths to be set in the raw output metadata.
Definition at line 688 of file parquet.hpp.
|
inline |
Returns maximum length of min or max values in column index, in bytes.
Definition at line 736 of file parquet.hpp.
|
inline |
Returns compression format used.
Definition at line 629 of file parquet.hpp.
|
inline |
Returns a shared pointer to the user-provided compression statistics.
Definition at line 764 of file parquet.hpp.
|
inline |
Returns policy for dictionary use.
Definition at line 743 of file parquet.hpp.
|
inline |
Returns Key-Value footer metadata information.
Definition at line 664 of file parquet.hpp.
|
inline |
Returns maximum dictionary size, in bytes.
Definition at line 750 of file parquet.hpp.
|
inline |
Returns maximum page fragment size, in rows.
Definition at line 757 of file parquet.hpp.
|
inline |
Returns the maximum uncompressed page size, in bytes.
If set larger than the row group size, then this will return the row group size.
Definition at line 714 of file parquet.hpp.
|
inline |
Returns maximum page size, in rows.
If set larger than the row group size, then this will return the row group size.
Definition at line 726 of file parquet.hpp.
|
inline |
|
inline |
|
inline |
Returns maximum row group size, in bytes.
Definition at line 698 of file parquet.hpp.
|
inline |
Returns maximum row group size, in rows.
Definition at line 705 of file parquet.hpp.
|
inline |
|
inline |
Returns the sorting_columns.
Definition at line 781 of file parquet.hpp.
|
inline |
Returns level of statistics requested in output file.
Definition at line 636 of file parquet.hpp.
|
inline |
|
inline |
Returns true
if timestamps will be written as INT96.
true
if timestamps will be written as INT96 Definition at line 674 of file parquet.hpp.
|
inline |
Returns true
if timestamps will be written as UTC.
true
if timestamps will be written as UTC Definition at line 681 of file parquet.hpp.
|
inline |
Returns true
if V2 page headers should be written.
true
if V2 page headers should be written. Definition at line 774 of file parquet.hpp.
void cudf::io::parquet_writer_options::set_column_chunks_file_paths | ( | std::vector< std::string > | file_paths | ) |
Sets column chunks file path to be set in the raw output metadata.
file_paths | Vector of Strings which indicates file path. Must be same size as number of data sinks in sink info |
void cudf::io::parquet_writer_options::set_column_index_truncate_length | ( | int32_t | size_bytes | ) |
Sets the maximum length of min or max values in column index, in bytes.
size_bytes | length min/max will be truncated to |
|
inline |
Sets compression type.
compression | The compression type to use |
Definition at line 817 of file parquet.hpp.
|
inline |
Sets the pointer to the output compression statistics.
comp_stats | Pointer to compression statistics to be updated after writing |
Definition at line 903 of file parquet.hpp.
void cudf::io::parquet_writer_options::set_dictionary_policy | ( | dictionary_policy | policy | ) |
Sets the policy for dictionary use.
policy | Policy for dictionary use |
void cudf::io::parquet_writer_options::set_key_value_metadata | ( | std::vector< std::map< std::string, std::string >> | metadata | ) |
Sets metadata.
metadata | Key-Value footer metadata |
void cudf::io::parquet_writer_options::set_max_dictionary_size | ( | size_t | size_bytes | ) |
Sets the maximum dictionary size, in bytes.
size_bytes | Maximum dictionary size, in bytes |
void cudf::io::parquet_writer_options::set_max_page_fragment_size | ( | size_type | size_rows | ) |
Sets the maximum page fragment size, in rows.
size_rows | Maximum page fragment size, in rows. |
void cudf::io::parquet_writer_options::set_max_page_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum uncompressed page size, in bytes.
size_bytes | Maximum uncompressed page size, in bytes to set |
void cudf::io::parquet_writer_options::set_max_page_size_rows | ( | size_type | size_rows | ) |
Sets the maximum page size, in rows.
size_rows | Maximum page size, in rows to set |
|
inline |
void cudf::io::parquet_writer_options::set_partitions | ( | std::vector< partition_info > | partitions | ) |
Sets partitions.
partitions | Partitions of input table in {start_row, num_rows} pairs. If specified, must be same size as number of sinks in sink_info |
void cudf::io::parquet_writer_options::set_row_group_size_bytes | ( | size_t | size_bytes | ) |
Sets the maximum row group size, in bytes.
size_bytes | Maximum row group size, in bytes to set |
void cudf::io::parquet_writer_options::set_row_group_size_rows | ( | size_type | size_rows | ) |
Sets the maximum row group size, in rows.
size_rows | Maximum row group size, in rows to set |
|
inline |
Sets sorting columns.
sorting_columns | Column sort order metadata |
Definition at line 920 of file parquet.hpp.
|
inline |
Sets the level of statistics.
sf | Level of statistics requested in the output file |
Definition at line 810 of file parquet.hpp.