io/types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
22 #pragma once
23 
24 #include <cudf/table/table.hpp>
25 #include <cudf/types.hpp>
26 #include <cudf/utilities/span.hpp>
27 
28 #include <map>
29 #include <memory>
30 #include <optional>
31 #include <string>
32 #include <unordered_map>
33 #include <vector>
34 
35 // Forward declarations
36 namespace arrow {
37 namespace io {
38 class RandomAccessFile;
39 }
40 } // namespace arrow
41 
42 namespace cudf {
44 namespace io {
45 class data_sink;
46 class datasource;
47 } // namespace io
48 } // namespace cudf
49 
51 namespace cudf {
53 namespace io {
57 enum class compression_type {
58  NONE,
59  AUTO,
60  SNAPPY,
61  GZIP,
62  BZIP2,
63  BROTLI,
64  ZIP,
65  XZ,
66  ZLIB,
67  LZ4,
68  LZO,
69  ZSTD
70 };
71 
75 enum class io_type {
76  FILEPATH,
77  HOST_BUFFER,
78  DEVICE_BUFFER,
79  VOID,
81 };
82 
86 enum class quote_style {
87  MINIMAL,
88  ALL,
89  NONNUMERIC,
90  NONE
91 };
92 
101 };
102 
107  public:
112 
122  size_t num_failed_bytes,
123  size_t num_skipped_bytes,
124  size_t num_compressed_output_bytes)
125  : _num_compressed_bytes(num_compressed_bytes),
126  _num_failed_bytes(num_failed_bytes),
127  _num_skipped_bytes(num_skipped_bytes),
128  _num_compressed_output_bytes(num_compressed_output_bytes)
129  {
130  }
131 
139  {
140  _num_compressed_bytes += other._num_compressed_bytes;
141  _num_failed_bytes += other._num_failed_bytes;
142  _num_skipped_bytes += other._num_skipped_bytes;
143  _num_compressed_output_bytes += other._num_compressed_output_bytes;
144  return *this;
145  }
146 
155  [[nodiscard]] auto num_compressed_bytes() const noexcept { return _num_compressed_bytes; }
156 
162  [[nodiscard]] auto num_failed_bytes() const noexcept { return _num_failed_bytes; }
163 
169  [[nodiscard]] auto num_skipped_bytes() const noexcept { return _num_skipped_bytes; }
170 
176  [[nodiscard]] auto num_total_input_bytes() const noexcept
177  {
179  }
180 
189  [[nodiscard]] auto compression_ratio() const noexcept
190  {
191  return static_cast<double>(num_compressed_bytes()) / _num_compressed_output_bytes;
192  }
193 
194  private:
195  std::size_t _num_compressed_bytes = 0;
196  std::size_t _num_failed_bytes = 0;
197  std::size_t _num_skipped_bytes = 0;
198  std::size_t _num_compressed_output_bytes = 0;
199 };
200 
207  ALWAYS
208 };
209 
217  std::string name;
218  std::vector<column_name_info> children;
219 
224  column_name_info(std::string const& _name) : name(_name) {}
225  column_name_info() = default;
226 };
227 
232  std::vector<column_name_info>
234  std::map<std::string, std::string> user_data;
235  std::vector<std::unordered_map<std::string, std::string>>
238 };
239 
244  std::unique_ptr<table> tbl;
246 };
247 
255 struct host_buffer {
256  // TODO: to be replaced by `host_span`
257  char const* data = nullptr;
258  size_t size = 0;
259  host_buffer() = default;
266  host_buffer(const char* data, size_t size) : data(data), size(size) {}
267 };
268 
276 template <typename T>
277 constexpr inline auto is_byte_like_type()
278 {
279  using non_cv_T = std::remove_cv_t<T>;
280  return std::is_same_v<non_cv_T, int8_t> || std::is_same_v<non_cv_T, char> ||
281  std::is_same_v<non_cv_T, uint8_t> || std::is_same_v<non_cv_T, unsigned char> ||
282  std::is_same_v<non_cv_T, std::byte>;
283 }
284 
288 struct source_info {
289  std::vector<std::shared_ptr<arrow::io::RandomAccessFile>> _files;
290 
291  source_info() = default;
292 
298  explicit source_info(std::vector<std::string> const& file_paths) : _filepaths(file_paths) {}
299 
305  explicit source_info(std::string const& file_path) : _filepaths({file_path}) {}
306 
314  explicit source_info(std::vector<host_buffer> const& host_buffers) : _type(io_type::HOST_BUFFER)
315  {
316  _host_buffers.reserve(host_buffers.size());
318  host_buffers.end(),
319  std::back_inserter(_host_buffers),
320  [](auto const hb) {
321  return cudf::host_span<std::byte const>{
322  reinterpret_cast<std::byte const*>(hb.data), hb.size};
323  });
324  }
325 
334  explicit source_info(const char* host_data, size_t size)
335  : _type(io_type::HOST_BUFFER),
336  _host_buffers(
337  {cudf::host_span<std::byte const>(reinterpret_cast<std::byte const*>(host_data), size)})
338  {
339  }
340 
346  template <typename T, CUDF_ENABLE_IF(is_byte_like_type<std::remove_cv_t<T>>())>
347  explicit source_info(cudf::host_span<cudf::host_span<T>> const host_buffers)
348  : _type(io_type::HOST_BUFFER)
349  {
350  if constexpr (not std::is_same_v<std::remove_cv_t<T>, std::byte>) {
351  _host_buffers.reserve(host_buffers.size());
352  std::transform(host_buffers.begin(),
353  host_buffers.end(),
354  std::back_inserter(_host_buffers),
355  [](auto const s) {
356  return cudf::host_span<std::byte const>{
357  reinterpret_cast<std::byte const*>(s.data()), s.size()};
358  });
359  } else {
360  _host_buffers.assign(host_buffers.begin(), host_buffers.end());
361  }
362  }
363 
369  template <typename T, CUDF_ENABLE_IF(is_byte_like_type<std::remove_cv_t<T>>())>
370  explicit source_info(cudf::host_span<T> host_data)
371  : _type(io_type::HOST_BUFFER),
372  _host_buffers{cudf::host_span<std::byte const>(
373  reinterpret_cast<std::byte const*>(host_data.data()), host_data.size())}
374  {
375  }
376 
383  : _type(io_type::DEVICE_BUFFER), _device_buffers(device_buffers.begin(), device_buffers.end())
384  {
385  }
386 
393  : _type(io_type::DEVICE_BUFFER), _device_buffers({{d_buffer}})
394  {
395  }
396 
402  explicit source_info(std::vector<cudf::io::datasource*> const& sources)
403  : _type(io_type::USER_IMPLEMENTED), _user_sources(sources)
404  {
405  }
406 
413  : _type(io_type::USER_IMPLEMENTED), _user_sources({source})
414  {
415  }
416 
422  [[nodiscard]] auto type() const { return _type; }
428  [[nodiscard]] auto const& filepaths() const { return _filepaths; }
434  [[nodiscard]] auto const& host_buffers() const { return _host_buffers; }
440  [[nodiscard]] auto const& device_buffers() const { return _device_buffers; }
446  [[nodiscard]] auto const& files() const { return _files; }
452  [[nodiscard]] auto const& user_sources() const { return _user_sources; }
453 
454  private:
455  io_type _type = io_type::FILEPATH;
456  std::vector<std::string> _filepaths;
457  std::vector<cudf::host_span<std::byte const>> _host_buffers;
458  std::vector<cudf::device_span<std::byte const>> _device_buffers;
459  std::vector<cudf::io::datasource*> _user_sources;
460 };
461 
465 struct sink_info {
466  sink_info() = default;
472  sink_info(size_t num_sinks) : _num_sinks(num_sinks) {}
473 
479  explicit sink_info(std::vector<std::string> const& file_paths)
480  : _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(file_paths)
481  {
482  }
483 
489  explicit sink_info(std::string const& file_path)
490  : _type(io_type::FILEPATH), _filepaths({file_path})
491  {
492  }
493 
499  explicit sink_info(std::vector<std::vector<char>*> const& buffers)
500  : _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(buffers)
501  {
502  }
508  explicit sink_info(std::vector<char>* buffer) : _type(io_type::HOST_BUFFER), _buffers({buffer}) {}
509 
515  explicit sink_info(std::vector<cudf::io::data_sink*> const& user_sinks)
516  : _type(io_type::USER_IMPLEMENTED), _num_sinks(user_sinks.size()), _user_sinks(user_sinks)
517  {
518  }
519 
525  explicit sink_info(class cudf::io::data_sink* user_sink)
526  : _type(io_type::USER_IMPLEMENTED), _user_sinks({user_sink})
527  {
528  }
529 
535  [[nodiscard]] auto type() const { return _type; }
541  [[nodiscard]] auto num_sinks() const { return _num_sinks; }
547  [[nodiscard]] auto const& filepaths() const { return _filepaths; }
553  [[nodiscard]] auto const& buffers() const { return _buffers; }
559  [[nodiscard]] auto const& user_sinks() const { return _user_sinks; }
560 
561  private:
562  io_type _type = io_type::VOID;
563  size_t _num_sinks = 1;
564  std::vector<std::string> _filepaths;
565  std::vector<std::vector<char>*> _buffers;
566  std::vector<cudf::io::data_sink*> _user_sinks;
567 };
568 
569 class table_input_metadata;
570 
575  friend table_input_metadata;
576  std::string _name = "";
577  std::optional<bool> _nullable;
578  bool _list_column_is_map = false;
579  bool _use_int96_timestamp = false;
580  bool _output_as_binary = false;
581  std::optional<uint8_t> _decimal_precision;
582  std::optional<int32_t> _parquet_field_id;
583  std::vector<column_in_metadata> children;
584 
585  public:
586  column_in_metadata() = default;
592  column_in_metadata(std::string_view name) : _name{name} {}
600  {
601  children.push_back(child);
602  return *this;
603  }
604 
611  column_in_metadata& set_name(std::string const& name) noexcept
612  {
613  _name = name;
614  return *this;
615  }
616 
624  {
625  _nullable = nullable;
626  return *this;
627  }
628 
637  {
638  _list_column_is_map = true;
639  return *this;
640  }
641 
651  {
652  _use_int96_timestamp = req;
653  return *this;
654  }
655 
663  column_in_metadata& set_decimal_precision(uint8_t precision) noexcept
664  {
665  _decimal_precision = precision;
666  return *this;
667  }
668 
675  column_in_metadata& set_parquet_field_id(int32_t field_id) noexcept
676  {
677  _parquet_field_id = field_id;
678  return *this;
679  }
680 
690  {
691  _output_as_binary = binary;
692  return *this;
693  }
694 
701  column_in_metadata& child(size_type i) noexcept { return children[i]; }
702 
709  [[nodiscard]] column_in_metadata const& child(size_type i) const noexcept { return children[i]; }
710 
716  [[nodiscard]] std::string get_name() const noexcept { return _name; }
717 
723  [[nodiscard]] bool is_nullability_defined() const noexcept { return _nullable.has_value(); }
724 
732  [[nodiscard]] bool nullable() const { return _nullable.value(); }
733 
739  [[nodiscard]] bool is_map() const noexcept { return _list_column_is_map; }
740 
747  [[nodiscard]] bool is_enabled_int96_timestamps() const noexcept { return _use_int96_timestamp; }
748 
754  [[nodiscard]] bool is_decimal_precision_set() const noexcept
755  {
756  return _decimal_precision.has_value();
757  }
758 
766  [[nodiscard]] uint8_t get_decimal_precision() const { return _decimal_precision.value(); }
767 
773  [[nodiscard]] bool is_parquet_field_id_set() const noexcept
774  {
775  return _parquet_field_id.has_value();
776  }
777 
785  [[nodiscard]] int32_t get_parquet_field_id() const { return _parquet_field_id.value(); }
786 
792  [[nodiscard]] size_type num_children() const noexcept { return children.size(); }
793 
799  [[nodiscard]] bool is_enabled_output_as_binary() const noexcept { return _output_as_binary; }
800 };
801 
806  public:
807  table_input_metadata() = default; // Required by cython
808 
817 
818  std::vector<column_in_metadata> column_metadata;
819 };
820 
830 
831  partition_info() = default;
838  partition_info(size_type start_row, size_type num_rows) : start_row(start_row), num_rows(num_rows)
839  {
840  }
841 };
842 
848  // Whether to read binary data as a string column
849  bool _convert_binary_to_strings{true};
850 
851  std::vector<reader_column_schema> children;
852 
853  public:
854  reader_column_schema() = default;
855 
861  reader_column_schema(size_type number_of_children) { children.resize(number_of_children); }
862 
869  {
870  children.assign(child_span.begin(), child_span.end());
871  }
872 
880  {
881  children.push_back(child);
882  return *this;
883  }
884 
891  [[nodiscard]] reader_column_schema& child(size_type i) { return children[i]; }
892 
899  [[nodiscard]] reader_column_schema const& child(size_type i) const { return children[i]; }
900 
910  {
911  _convert_binary_to_strings = convert_to_string;
912  return *this;
913  }
914 
920  [[nodiscard]] bool is_enabled_convert_binary_to_strings() const
921  {
922  return _convert_binary_to_strings;
923  }
924 
930  [[nodiscard]] size_t get_num_children() const { return children.size(); }
931 };
932 
933 } // namespace io
934 } // namespace cudf
cudf::io::writer_compression_statistics::num_skipped_bytes
auto num_skipped_bytes() const noexcept
Returns the number of bytes in blocks that were skipped during compression.
Definition: io/types.hpp:169
cudf::io::ADAPTIVE
@ ADAPTIVE
Use dictionary when it will not impact compression.
Definition: io/types.hpp:206
cudf::io::partition_info::partition_info
partition_info(size_type start_row, size_type num_rows)
Construct a new partition_info.
Definition: io/types.hpp:838
cudf::io::compression_type
compression_type
Compression algorithms.
Definition: io/types.hpp:57
cudf::io::quote_style::MINIMAL
@ MINIMAL
Quote only fields which contain special characters.
cudf::io::statistics_freq
statistics_freq
Column statistics granularity type for parquet/orc writers.
Definition: io/types.hpp:96
cudf::io::source_info::source_info
source_info(std::vector< host_buffer > const &host_buffers)
Construct a new source info object for multiple buffers in host memory.
Definition: io/types.hpp:314
cudf::io::writer_compression_statistics::writer_compression_statistics
writer_compression_statistics(size_t num_compressed_bytes, size_t num_failed_bytes, size_t num_skipped_bytes, size_t num_compressed_output_bytes)
Constructor with initial values.
Definition: io/types.hpp:121
cudf::io::reader_column_schema::child
reader_column_schema const & child(size_type i) const
Get const reference to a child of this column.
Definition: io/types.hpp:899
cudf::io::sink_info
Destination information for write interfaces.
Definition: io/types.hpp:465
cudf::io::column_in_metadata::add_child
column_in_metadata & add_child(column_in_metadata const &child)
Add the children metadata of this column.
Definition: io/types.hpp:599
cudf::io::reader_column_schema
schema element for reader
Definition: io/types.hpp:847
cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:80
cudf::io::table_input_metadata::column_metadata
std::vector< column_in_metadata > column_metadata
List of column metadata.
Definition: io/types.hpp:818
cudf::io::NEVER
@ NEVER
Never use dictionary encoding.
Definition: io/types.hpp:205
cudf::io::sink_info::type
auto type() const
Get the type of the input.
Definition: io/types.hpp:535
cudf::io::source_info::source_info
source_info(cudf::host_span< cudf::host_span< T >> const host_buffers)
Construct a new source info object for multiple buffers in host memory.
Definition: io/types.hpp:347
cudf::io::sink_info::sink_info
sink_info(std::string const &file_path)
Construct a new sink info object for a single file.
Definition: io/types.hpp:489
cudf::io::source_info::device_buffers
auto const & device_buffers() const
Get the device buffers of the input.
Definition: io/types.hpp:440
cudf::host_span
C++20 std::span with reduced feature set.
Definition: span.hpp:210
types.hpp
Type declarations for libcudf.
cudf::io::column_in_metadata::set_parquet_field_id
column_in_metadata & set_parquet_field_id(int32_t field_id) noexcept
Set the parquet field id of this column.
Definition: io/types.hpp:675
cudf::io::is_byte_like_type
constexpr auto is_byte_like_type()
Returns true if the type is byte-like, meaning it is reasonable to pass as a pointer to bytes.
Definition: io/types.hpp:277
cudf::io::sink_info::sink_info
sink_info(size_t num_sinks)
Construct a new sink info object.
Definition: io/types.hpp:472
cudf::io::column_in_metadata::num_children
size_type num_children() const noexcept
Get the number of children of this column.
Definition: io/types.hpp:792
cudf::io::table_input_metadata::table_input_metadata
table_input_metadata(table_view const &table)
Construct a new table_input_metadata from a table_view.
cudf::io::table_metadata::schema_info
std::vector< column_name_info > schema_info
Detailed name information for the entire output hierarchy.
Definition: io/types.hpp:233
cudf::io::writer_compression_statistics::writer_compression_statistics
writer_compression_statistics()=default
Default constructor.
cudf::detail::span_base< T, cudf::dynamic_extent, host_span< T, cudf::dynamic_extent > >::end
constexpr iterator end() const noexcept
Returns an iterator to the element following the last element of the span.
Definition: span.hpp:123
cudf::io::io_type::FILEPATH
@ FILEPATH
Input/output is a file path.
cudf::table
A set of cudf::column's of the same size.
Definition: table.hpp:40
cudf::io::column_in_metadata::nullable
bool nullable() const
Gets the explicitly set nullability for this column.
Definition: io/types.hpp:732
cudf::io::column_name_info::name
std::string name
Column name.
Definition: io/types.hpp:217
cudf::io::column_in_metadata::is_nullability_defined
bool is_nullability_defined() const noexcept
Get whether nullability has been explicitly set for this column.
Definition: io/types.hpp:723
cudf::io::column_in_metadata::is_enabled_output_as_binary
bool is_enabled_output_as_binary() const noexcept
Get whether to encode this column as binary or string data.
Definition: io/types.hpp:799
cudf::io::table_input_metadata
Metadata for a table.
Definition: io/types.hpp:805
cudf::io::sink_info::sink_info
sink_info(std::vector< cudf::io::data_sink * > const &user_sinks)
Construct a new sink info object for multiple user-implemented sinks.
Definition: io/types.hpp:515
cudf::io::source_info::source_info
source_info(const char *host_data, size_t size)
Construct a new source info object for a single buffer.
Definition: io/types.hpp:334
cudf::io::reader_column_schema::child
reader_column_schema & child(size_type i)
Get reference to a child of this column.
Definition: io/types.hpp:891
cudf::io::column_in_metadata::get_decimal_precision
uint8_t get_decimal_precision() const
Get the decimal precision that was set for this column.
Definition: io/types.hpp:766
cudf::io::writer_compression_statistics::compression_ratio
auto compression_ratio() const noexcept
Returns the compression ratio for the successfully compressed blocks.
Definition: io/types.hpp:189
cudf::nullable
bool nullable(table_view const &view)
Returns True if any of the columns in the table is nullable. (not entire hierarchy)
Definition: table_view.hpp:305
cudf::io::host_buffer
Non-owning view of a host memory buffer.
Definition: io/types.hpp:255
cudf::io::host_buffer::size
size_t size
Size of the buffer.
Definition: io/types.hpp:258
cudf::io::reader_column_schema::reader_column_schema
reader_column_schema(size_type number_of_children)
Construct a new reader column schema object.
Definition: io/types.hpp:861
cudf::io::column_in_metadata::child
column_in_metadata const & child(size_type i) const noexcept
Get const reference to a child of this column.
Definition: io/types.hpp:709
cudf::io::column_in_metadata
Metadata for a column.
Definition: io/types.hpp:574
cudf::io::column_in_metadata::get_parquet_field_id
int32_t get_parquet_field_id() const
Get the parquet field id that was set for this column.
Definition: io/types.hpp:785
cudf::io::column_in_metadata::set_int96_timestamps
column_in_metadata & set_int96_timestamps(bool req) noexcept
Specifies whether this timestamp column should be encoded using the deprecated int96 physical type....
Definition: io/types.hpp:650
cudf::io::STATISTICS_ROWGROUP
@ STATISTICS_ROWGROUP
Per-Rowgroup column statistics.
Definition: io/types.hpp:98
cudf::io::column_in_metadata::get_name
std::string get_name() const noexcept
Get the name of this column.
Definition: io/types.hpp:716
cudf::io::column_in_metadata::set_name
column_in_metadata & set_name(std::string const &name) noexcept
Set the name of this column.
Definition: io/types.hpp:611
cudf::io::source_info::host_buffers
auto const & host_buffers() const
Get the host buffers of the input.
Definition: io/types.hpp:434
cudf::io::source_info::filepaths
auto const & filepaths() const
Get the filepaths of the input.
Definition: io/types.hpp:428
cudf::io::table_metadata::user_data
std::map< std::string, std::string > user_data
Definition: io/types.hpp:234
cudf::io::sink_info::filepaths
auto const & filepaths() const
Get the filepaths of the input.
Definition: io/types.hpp:547
cudf::io::column_in_metadata::column_in_metadata
column_in_metadata(std::string_view name)
Construct a new column in metadata object.
Definition: io/types.hpp:592
cudf::io::table_metadata
Table metadata returned by IO readers.
Definition: io/types.hpp:231
cudf::io::source_info::files
auto const & files() const
Get the input files.
Definition: io/types.hpp:446
cudf::io::table_metadata::per_file_user_data
std::vector< std::unordered_map< std::string, std::string > > per_file_user_data
Per file format-dependent metadata as key-values pairs.
Definition: io/types.hpp:237
cudf::io::sink_info::sink_info
sink_info(std::vector< char > *buffer)
Construct a new sink info object for a single host buffer.
Definition: io/types.hpp:508
cudf::io::column_in_metadata::set_nullability
column_in_metadata & set_nullability(bool nullable) noexcept
Set the nullability of this column.
Definition: io/types.hpp:623
cudf::io::sink_info::sink_info
sink_info(std::vector< std::vector< char > * > const &buffers)
Construct a new sink info object for multiple host buffers.
Definition: io/types.hpp:499
cudf::io::source_info
Source information for read interfaces.
Definition: io/types.hpp:288
cudf::io::writer_compression_statistics::num_total_input_bytes
auto num_total_input_bytes() const noexcept
Returns the total size of compression inputs.
Definition: io/types.hpp:176
cudf::detail::span_base< T, cudf::dynamic_extent, host_span< T, cudf::dynamic_extent > >::data
constexpr pointer data() const noexcept
Returns a pointer to the beginning of the sequence.
Definition: span.hpp:129
cudf::io::writer_compression_statistics::operator+=
writer_compression_statistics & operator+=(const writer_compression_statistics &other) noexcept
Adds the values from another writer_compression_statistics object.
Definition: io/types.hpp:138
cudf::io::column_in_metadata::is_map
bool is_map() const noexcept
If this is the metadata of a list column, returns whether it is to be encoded as a map.
Definition: io/types.hpp:739
cudf::io::column_in_metadata::set_output_as_binary
column_in_metadata & set_output_as_binary(bool binary) noexcept
Specifies whether this column should be written as binary or string data Only valid for the following...
Definition: io/types.hpp:689
cudf::io::data_sink
Interface class for storing the output data from the writers.
Definition: data_sink.hpp:36
cudf::io::source_info::source_info
source_info(std::vector< cudf::io::datasource * > const &sources)
Construct a new source info object for multiple user-implemented sources.
Definition: io/types.hpp:402
cudf::io::column_name_info::children
std::vector< column_name_info > children
Child column names.
Definition: io/types.hpp:218
cudf::io::STATISTICS_COLUMN
@ STATISTICS_COLUMN
Full column and offset indices. Implies STATISTICS_ROWGROUP.
Definition: io/types.hpp:100
cudf::table_view
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:187
cudf::io::STATISTICS_PAGE
@ STATISTICS_PAGE
Per-page column statistics.
Definition: io/types.hpp:99
cudf::transform
std::unique_ptr< column > transform(column_view const &input, std::string const &unary_udf, data_type output_type, bool is_ptx, rmm::mr::device_memory_resource *mr=rmm::mr::get_current_device_resource())
Creates a new column by applying a unary function against every element of an input column.
cudf::io::compression_type::NONE
@ NONE
No compression.
cudf::io::sink_info::num_sinks
auto num_sinks() const
Get the number of sinks.
Definition: io/types.hpp:541
cudf::io::source_info::source_info
source_info(cudf::device_span< std::byte const > d_buffer)
Construct a new source info object from a device buffer.
Definition: io/types.hpp:392
cudf::io::column_in_metadata::is_enabled_int96_timestamps
bool is_enabled_int96_timestamps() const noexcept
Get whether to encode this timestamp column using deprecated int96 physical type.
Definition: io/types.hpp:747
cudf::io::source_info::source_info
source_info(std::vector< std::string > const &file_paths)
Construct a new source info object for multiple files.
Definition: io/types.hpp:298
cudf::io::column_in_metadata::is_parquet_field_id_set
bool is_parquet_field_id_set() const noexcept
Get whether parquet field id has been set for this column.
Definition: io/types.hpp:773
cudf::io::sink_info::sink_info
sink_info(std::vector< std::string > const &file_paths)
Construct a new sink info object for multiple files.
Definition: io/types.hpp:479
cudf::io::sink_info::sink_info
sink_info(class cudf::io::data_sink *user_sink)
Construct a new sink info object for a single user-implemented sink.
Definition: io/types.hpp:525
cudf::io::reader_column_schema::get_num_children
size_t get_num_children() const
Get the number of child objects.
Definition: io/types.hpp:930
cudf::io::quote_style::ALL
@ ALL
Quote all fields.
cudf::io::source_info::_files
std::vector< std::shared_ptr< arrow::io::RandomAccessFile > > _files
Input files.
Definition: io/types.hpp:289
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::io::host_buffer::host_buffer
host_buffer(const char *data, size_t size)
Construct a new host buffer object.
Definition: io/types.hpp:266
cudf::io::source_info::user_sources
auto const & user_sources() const
Get the user sources of the input.
Definition: io/types.hpp:452
cudf::io::column_in_metadata::set_list_column_as_map
column_in_metadata & set_list_column_as_map() noexcept
Specify that this list column should be encoded as a map in the written file.
Definition: io/types.hpp:636
cudf::io::writer_compression_statistics
Statistics about compression performed by a writer.
Definition: io/types.hpp:106
cudf::io::reader_column_schema::is_enabled_convert_binary_to_strings
bool is_enabled_convert_binary_to_strings() const
Get whether to encode this column as binary or string data.
Definition: io/types.hpp:920
cudf::io::table_with_metadata::tbl
std::unique_ptr< table > tbl
Table.
Definition: io/types.hpp:244
cudf::io::column_in_metadata::is_decimal_precision_set
bool is_decimal_precision_set() const noexcept
Get whether precision has been set for this decimal column.
Definition: io/types.hpp:754
cudf::io::dictionary_policy
dictionary_policy
Control use of dictionary encoding for parquet writer.
Definition: io/types.hpp:204
cudf::io::STATISTICS_NONE
@ STATISTICS_NONE
No column statistics.
Definition: io/types.hpp:97
cudf::io::partition_info
Information used while writing partitioned datasets.
Definition: io/types.hpp:827
cudf::io::partition_info::num_rows
size_type num_rows
The number of rows in the partition.
Definition: io/types.hpp:829
cudf::io::source_info::source_info
source_info(std::string const &file_path)
Construct a new source info object for a single file.
Definition: io/types.hpp:305
table.hpp
Class definition for cudf::table.
cudf::io::column_name_info
Detailed name information for output columns.
Definition: io/types.hpp:216
cudf::io::source_info::type
auto type() const
Get the type of the input.
Definition: io/types.hpp:422
cudf::io::table_with_metadata
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:243
cudf::io::sink_info::user_sinks
auto const & user_sinks() const
Get the user sinks of the input.
Definition: io/types.hpp:559
cudf::io::reader_column_schema::reader_column_schema
reader_column_schema(host_span< reader_column_schema > const &child_span)
Construct a new reader column schema object with a span defining the children.
Definition: io/types.hpp:868
cudf::io::source_info::source_info
source_info(cudf::host_span< T > host_data)
Construct a new source info object for a single buffer.
Definition: io/types.hpp:370
cudf::io::column_name_info::column_name_info
column_name_info(std::string const &_name)
Construct a column name info with a name and no children.
Definition: io/types.hpp:224
cudf::io::column_in_metadata::child
column_in_metadata & child(size_type i) noexcept
Get reference to a child of this column.
Definition: io/types.hpp:701
cudf::device_span
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:277
cudf::io::datasource
Interface class for providing input data to the readers.
Definition: datasource.hpp:64
cudf::io::writer_compression_statistics::num_compressed_bytes
auto num_compressed_bytes() const noexcept
Returns the number of bytes in blocks that were successfully compressed.
Definition: io/types.hpp:155
cudf::io::reader_column_schema::set_convert_binary_to_strings
reader_column_schema & set_convert_binary_to_strings(bool convert_to_string)
Specifies whether this column should be written as binary or string data Only valid for the following...
Definition: io/types.hpp:909
cudf::io::sink_info::buffers
auto const & buffers() const
Get the host buffers of the input.
Definition: io/types.hpp:553
cudf::io::reader_column_schema::add_child
reader_column_schema & add_child(reader_column_schema const &child)
Add the children metadata of this column.
Definition: io/types.hpp:879
cudf::io::writer_compression_statistics::num_failed_bytes
auto num_failed_bytes() const noexcept
Returns the number of bytes in blocks that failed to compress.
Definition: io/types.hpp:162
cudf::detail::span_base< T, cudf::dynamic_extent, host_span< T, cudf::dynamic_extent > >::size
constexpr size_type size() const noexcept
Returns the number of elements in the span.
Definition: span.hpp:136
cudf::io::source_info::source_info
source_info(cudf::host_span< cudf::device_span< std::byte const >> device_buffers)
Construct a new source info object for multiple buffers in device memory.
Definition: io/types.hpp:382
cudf::io::column_in_metadata::set_decimal_precision
column_in_metadata & set_decimal_precision(uint8_t precision) noexcept
Set the decimal precision of this column. Only valid if this column is a decimal (fixed-point) type.
Definition: io/types.hpp:663
cudf::io::host_buffer::data
char const * data
Pointer to the buffer.
Definition: io/types.hpp:257
cudf::io::partition_info::start_row
size_type start_row
The start row of the partition.
Definition: io/types.hpp:828
cudf::io::table_with_metadata::metadata
table_metadata metadata
Table metadata.
Definition: io/types.hpp:245
cudf::detail::span_base< T, cudf::dynamic_extent, host_span< T, cudf::dynamic_extent > >::begin
constexpr iterator begin() const noexcept
Returns an iterator to the first element of the span.
Definition: span.hpp:115
cudf::io::source_info::source_info
source_info(cudf::io::datasource *source)
Construct a new source info object for a single user-implemented source.
Definition: io/types.hpp:412
cudf::io::ALWAYS
@ ALWAYS
Use dictionary reqardless of impact on compression.
Definition: io/types.hpp:207
cudf::io::quote_style
quote_style
Behavior when handling quotations in field data.
Definition: io/types.hpp:86
cudf::io::io_type
io_type
Data source or destination types.
Definition: io/types.hpp:75