io/json.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "types.hpp"
20 
21 #include <cudf/detail/utilities/visitor_overload.hpp>
23 #include <cudf/types.hpp>
24 #include <cudf/utilities/error.hpp>
26 
27 #include <map>
28 #include <string>
29 #include <utility>
30 #include <variant>
31 #include <vector>
32 
33 namespace CUDF_EXPORT cudf {
34 namespace io {
41 class json_reader_options_builder;
42 
52 
56  std::map<std::string, schema_element> child_types;
57 
61  std::optional<std::vector<std::string>> column_order;
62 };
63 
68  FAIL,
70 };
71 
96  public:
97  using dtype_variant =
98  std::variant<std::vector<data_type>,
99  std::map<std::string, data_type>,
100  std::map<std::string, schema_element>,
101  schema_element>;
102 
103  private:
104  source_info _source;
105 
106  // Data types of the column; empty to infer dtypes
107  dtype_variant _dtypes;
108  // Specify the compression format of the source or infer from file extension
109  compression_type _compression = compression_type::AUTO;
110 
111  // Read the file as a json object per line
112  bool _lines = false;
113  // Parse mixed types as a string column
114  bool _mixed_types_as_string = false;
115  // Delimiter separating records in JSON lines
116  char _delimiter = '\n';
117  // Prune columns on read, selected based on the _dtypes option
118  bool _prune_columns = false;
119  // Experimental features: new column tree construction
120  bool _experimental = false;
121 
122  // Bytes to skip from the start
123  size_t _byte_range_offset = 0;
124  // Bytes to read; always reads complete rows
125  size_t _byte_range_size = 0;
126 
127  // Whether to parse dates as DD/MM versus MM/DD
128  bool _dayfirst = false;
129 
130  // Whether to keep the quote characters of string values
131  bool _keep_quotes = false;
132 
133  // Normalize single quotes
134  bool _normalize_single_quotes = false;
135 
136  // Normalize unquoted spaces and tabs
137  bool _normalize_whitespace = false;
138 
139  // Whether to recover after an invalid JSON line
140  json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;
141 
142  // Validation checks for spark
143  // Should the json validation be strict or not
144  // Note: strict validation enforces the JSON specification https://www.json.org/json-en.html
145  bool _strict_validation = false;
146  // Allow leading zeros for numeric values.
147  bool _allow_numeric_leading_zeros = true;
148  // Allow non-numeric numbers: NaN, +INF, -INF, +Infinity, Infinity, -Infinity
149  bool _allow_nonnumeric_numbers = true;
150  // Allow unquoted control characters
151  bool _allow_unquoted_control_chars = true;
152  // Additional values to recognize as null values
153  std::vector<std::string> _na_values;
154 
160  explicit json_reader_options(source_info src) : _source{std::move(src)} {}
161 
163 
164  public:
170  json_reader_options() = default;
171 
179 
185  [[nodiscard]] source_info const& get_source() const { return _source; }
186 
192  [[nodiscard]] dtype_variant const& get_dtypes() const { return _dtypes; }
193 
199  [[nodiscard]] compression_type get_compression() const { return _compression; }
200 
206  [[nodiscard]] size_t get_byte_range_offset() const { return _byte_range_offset; }
207 
213  [[nodiscard]] size_t get_byte_range_size() const { return _byte_range_size; }
214 
220  [[nodiscard]] size_t get_byte_range_size_with_padding() const
221  {
222  if (_byte_range_size == 0) {
223  return 0;
224  } else {
225  return _byte_range_size + get_byte_range_padding();
226  }
227  }
228 
234  [[nodiscard]] size_t get_byte_range_padding() const
235  {
236  auto const num_columns =
237  std::visit(cudf::detail::visitor_overload{
238  [](auto const& dtypes) { return dtypes.size(); },
239  [](schema_element const& dtypes) { return dtypes.child_types.size(); }},
240  _dtypes);
241 
242  auto const max_row_bytes = 16 * 1024; // 16KB
243  auto const column_bytes = 64;
244  auto const base_padding = 1024; // 1KB
245 
246  if (num_columns == 0) {
247  // Use flat size if the number of columns is not known
248  return max_row_bytes;
249  }
250 
251  // Expand the size based on the number of columns, if available
252  return base_padding + num_columns * column_bytes;
253  }
254 
260  [[nodiscard]] char get_delimiter() const { return _delimiter; }
261 
267  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
268 
274  [[nodiscard]] bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; }
275 
286  [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; }
287 
295  [[nodiscard]] bool is_enabled_experimental() const { return _experimental; }
296 
302  [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; }
303 
309  [[nodiscard]] bool is_enabled_keep_quotes() const { return _keep_quotes; }
310 
316  [[nodiscard]] bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; }
317 
323  [[nodiscard]] bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; }
324 
330  [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }
331 
337  [[nodiscard]] bool is_strict_validation() const { return _strict_validation; }
338 
346  [[nodiscard]] bool is_allowed_numeric_leading_zeros() const
347  {
348  return _allow_numeric_leading_zeros;
349  }
350 
359  [[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }
360 
369  [[nodiscard]] bool is_allowed_unquoted_control_chars() const
370  {
371  return _allow_unquoted_control_chars;
372  }
373 
379  [[nodiscard]] std::vector<std::string> const& get_na_values() const { return _na_values; }
380 
386  void set_dtypes(std::vector<data_type> types) { _dtypes = std::move(types); }
387 
393  void set_dtypes(std::map<std::string, data_type> types) { _dtypes = std::move(types); }
394 
400  void set_dtypes(std::map<std::string, schema_element> types) { _dtypes = std::move(types); }
401 
409 
415  void set_compression(compression_type comp_type) { _compression = comp_type; }
416 
422  void set_byte_range_offset(size_t offset) { _byte_range_offset = offset; }
423 
429  void set_byte_range_size(size_t size) { _byte_range_size = size; }
430 
436  void set_delimiter(char delimiter)
437  {
438  switch (delimiter) {
439  case '{':
440  case '[':
441  case '}':
442  case ']':
443  case ',':
444  case ':':
445  case '"':
446  case '\'':
447  case '\\':
448  case ' ':
449  case '\t':
450  case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break;
451  }
452  _delimiter = delimiter;
453  }
454 
460  void enable_lines(bool val) { _lines = val; }
461 
468  void enable_mixed_types_as_string(bool val) { _mixed_types_as_string = val; }
469 
479  void enable_prune_columns(bool val) { _prune_columns = val; }
480 
489  void enable_experimental(bool val) { _experimental = val; }
490 
496  void enable_dayfirst(bool val) { _dayfirst = val; }
497 
504  void enable_keep_quotes(bool val) { _keep_quotes = val; }
505 
512  void enable_normalize_single_quotes(bool val) { _normalize_single_quotes = val; }
513 
520  void enable_normalize_whitespace(bool val) { _normalize_whitespace = val; }
521 
527  void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }
528 
534  void set_strict_validation(bool val) { _strict_validation = val; }
535 
545  {
546  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
547  _allow_numeric_leading_zeros = val;
548  }
549 
559  {
560  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
561  _allow_nonnumeric_numbers = val;
562  }
563 
574  {
575  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
576  _allow_unquoted_control_chars = val;
577  }
578 
584  void set_na_values(std::vector<std::string> vals) { _na_values = std::move(vals); }
585 };
586 
591  json_reader_options options;
592 
593  public:
599  explicit json_reader_options_builder() = default;
600 
606  explicit json_reader_options_builder(source_info src) : options{std::move(src)} {}
607 
614  json_reader_options_builder& dtypes(std::vector<data_type> types)
615  {
616  options._dtypes = std::move(types);
617  return *this;
618  }
619 
626  json_reader_options_builder& dtypes(std::map<std::string, data_type> types)
627  {
628  options._dtypes = std::move(types);
629  return *this;
630  }
631 
638  json_reader_options_builder& dtypes(std::map<std::string, schema_element> types)
639  {
640  options._dtypes = std::move(types);
641  return *this;
642  }
643 
651  {
652  options.set_dtypes(std::move(types));
653  return *this;
654  }
655 
663  {
664  options._compression = comp_type;
665  return *this;
666  }
667 
675  {
676  options._byte_range_offset = offset;
677  return *this;
678  }
679 
687  {
688  options._byte_range_size = size;
689  return *this;
690  }
691 
699  {
700  options.set_delimiter(delimiter);
701  return *this;
702  }
703 
711  {
712  options._lines = val;
713  return *this;
714  }
715 
724  {
725  options._mixed_types_as_string = val;
726  return *this;
727  }
728 
740  {
741  options._prune_columns = val;
742  return *this;
743  }
744 
755  {
756  options._experimental = val;
757  return *this;
758  }
759 
767  {
768  options._dayfirst = val;
769  return *this;
770  }
771 
780  {
781  options._keep_quotes = val;
782  return *this;
783  }
784 
793  {
794  options._normalize_single_quotes = val;
795  return *this;
796  }
797 
806  {
807  options._normalize_whitespace = val;
808  return *this;
809  }
810 
818  {
819  options._recovery_mode = val;
820  return *this;
821  }
822 
830  {
831  options.set_strict_validation(val);
832  return *this;
833  }
834 
845  {
846  options.allow_numeric_leading_zeros(val);
847  return *this;
848  }
849 
861  {
862  options.allow_nonnumeric_numbers(val);
863  return *this;
864  }
865 
876  {
877  options.allow_unquoted_control_chars(val);
878  return *this;
879  }
880 
887  json_reader_options_builder& na_values(std::vector<std::string> vals)
888  {
889  options.set_na_values(std::move(vals));
890  return *this;
891  }
892 
896  operator json_reader_options&&() { return std::move(options); }
897 
905  json_reader_options&& build() { return std::move(options); }
906 };
907 
926  json_reader_options options,
929  // end of group
931 
942 
947  // Specify the sink to use for writer output
948  sink_info _sink;
949  // maximum number of rows to write in each chunk (limits memory use)
950  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
951  // Set of columns to output
952  table_view _table;
953  // string to use for null entries
954  std::string _na_rep = "";
955  // Indicates whether to output nulls as 'null' or exclude the field
956  bool _include_nulls = false;
957  // Indicates whether to use JSON lines for records format
958  bool _lines = false;
959  // string to use for values != 0 in INT8 types (default 'true')
960  std::string _true_value = std::string{"true"};
961  // string to use for values == 0 in INT8 types (default 'false')
962  std::string _false_value = std::string{"false"};
963  // Names of all columns; if empty, writer will generate column names
964  std::optional<table_metadata> _metadata; // Optional column names
965 
973  : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table))
974  {
975  }
976 
978 
979  public:
985  explicit json_writer_options() = default;
986 
996 
1002  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
1003 
1009  [[nodiscard]] table_view const& get_table() const { return _table; }
1010 
1016  [[nodiscard]] std::optional<table_metadata> const& get_metadata() const { return _metadata; }
1017 
1023  [[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }
1024 
1030  [[nodiscard]] bool is_enabled_include_nulls() const { return _include_nulls; }
1031 
1037  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
1038 
1044  [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; }
1045 
1051  [[nodiscard]] std::string const& get_true_value() const { return _true_value; }
1052 
1058  [[nodiscard]] std::string const& get_false_value() const { return _false_value; }
1059 
1060  // Setter
1061 
1067  void set_table(table_view tbl) { _table = tbl; }
1068 
1074  void set_metadata(table_metadata metadata) { _metadata = std::move(metadata); }
1075 
1081  void set_na_rep(std::string val) { _na_rep = std::move(val); }
1082 
1088  void enable_include_nulls(bool val) { _include_nulls = val; }
1089 
1095  void enable_lines(bool val) { _lines = val; }
1096 
1102  void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; }
1103 
1109  void set_true_value(std::string val) { _true_value = std::move(val); }
1110 
1116  void set_false_value(std::string val) { _false_value = std::move(val); }
1117 };
1118 
1123  json_writer_options options;
1124 
1125  public:
1131  explicit json_writer_options_builder() = default;
1132 
1140  : options{sink, table}
1141  {
1142  }
1143 
1151  {
1152  options._table = tbl;
1153  return *this;
1154  }
1155 
1163  {
1164  options._metadata = std::move(metadata);
1165  return *this;
1166  }
1167 
1175  {
1176  options._na_rep = std::move(val);
1177  return *this;
1178  };
1179 
1187  {
1188  options._include_nulls = val;
1189  return *this;
1190  }
1191 
1199  {
1200  options._lines = val;
1201  return *this;
1202  }
1203 
1211  {
1212  options._rows_per_chunk = val;
1213  return *this;
1214  }
1215 
1223  {
1224  options._true_value = std::move(val);
1225  return *this;
1226  }
1227 
1235  {
1236  options._false_value = std::move(val);
1237  return *this;
1238  }
1239 
1243  operator json_writer_options&&() { return std::move(options); }
1244 
1252  json_writer_options&& build() { return std::move(options); }
1253 };
1254 
1272 void write_json(json_writer_options const& options,
1274  // end of group
1276 } // namespace io
1277 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
Builds settings to use for read_json().
Definition: io/json.hpp:590
json_reader_options_builder & normalize_single_quotes(bool val)
Set whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:792
json_reader_options_builder & nonnumeric_numbers(bool val)
Set whether specific unquoted number values are valid JSON. The values are NaN, +INF,...
Definition: io/json.hpp:860
json_reader_options_builder & keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:779
json_reader_options_builder & normalize_whitespace(bool val)
Set whether the reader should normalize unquoted whitespace.
Definition: io/json.hpp:805
json_reader_options_builder & numeric_leading_zeros(bool val)
Set Whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:844
json_reader_options_builder & dtypes(schema_element types)
Set data types for columns to be read.
Definition: io/json.hpp:650
json_reader_options_builder & dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:766
json_reader_options_builder & recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:817
json_reader_options_builder & na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:887
json_reader_options_builder & delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:698
json_reader_options_builder & prune_columns(bool val)
Set whether to prune columns on read, selected based on the dtypes option.
Definition: io/json.hpp:739
json_reader_options_builder & experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:754
json_reader_options_builder & lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:710
json_reader_options_builder & dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:614
json_reader_options && build()
move json_reader_options member once it's built.
Definition: io/json.hpp:905
json_reader_options_builder & mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:723
json_reader_options_builder & unquoted_control_chars(bool val)
Set whether chars >= 0 and < 32 are allowed in a quoted string without some form of escaping....
Definition: io/json.hpp:875
json_reader_options_builder & compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:662
json_reader_options_builder(source_info src)
Constructor from source info.
Definition: io/json.hpp:606
json_reader_options_builder & strict_validation(bool val)
Set whether json validation should be strict or not.
Definition: io/json.hpp:829
json_reader_options_builder & byte_range_size(size_type size)
Set number of bytes to read.
Definition: io/json.hpp:686
json_reader_options_builder & dtypes(std::map< std::string, schema_element > types)
Set data types for columns to be read.
Definition: io/json.hpp:638
json_reader_options_builder & byte_range_offset(size_type offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:674
json_reader_options_builder()=default
Default constructor.
json_reader_options_builder & dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:626
Input arguments to the read_json interface.
Definition: io/json.hpp:95
bool is_allowed_nonnumeric_numbers() const
Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:359
void enable_mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:468
void set_compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:415
void set_dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:386
void allow_unquoted_control_chars(bool val)
Set whether in a quoted string should characters greater than or equal to 0 and less than 32 be allow...
Definition: io/json.hpp:573
void enable_normalize_single_quotes(bool val)
Set whether the reader should enable normalization of single quotes around strings.
Definition: io/json.hpp:512
bool is_allowed_numeric_leading_zeros() const
Whether leading zeros are allowed in numeric values.
Definition: io/json.hpp:346
void enable_prune_columns(bool val)
Set whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:479
bool is_enabled_keep_quotes() const
Whether the reader should keep quotes of string values.
Definition: io/json.hpp:309
void set_dtypes(schema_element types)
Set data types for a potentially nested column hierarchy.
void enable_normalize_whitespace(bool val)
Set whether the reader should enable normalization of unquoted whitespace.
Definition: io/json.hpp:520
void allow_nonnumeric_numbers(bool val)
Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:558
size_t get_byte_range_offset() const
Returns number of bytes to skip from source start.
Definition: io/json.hpp:206
source_info const & get_source() const
Returns source info.
Definition: io/json.hpp:185
void enable_experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:489
void set_dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:393
bool is_enabled_prune_columns() const
Whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:286
char get_delimiter() const
Returns delimiter separating records in JSON lines.
Definition: io/json.hpp:260
bool is_enabled_lines() const
Whether to read the file as a json object per line.
Definition: io/json.hpp:267
void allow_numeric_leading_zeros(bool val)
Set whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:544
void set_strict_validation(bool val)
Set whether strict validation is enabled or not.
Definition: io/json.hpp:534
bool is_enabled_mixed_types_as_string() const
Whether to parse mixed types as a string column.
Definition: io/json.hpp:274
json_reader_options()=default
Default constructor.
void set_na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:584
void enable_dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:496
size_t get_byte_range_size_with_padding() const
Returns number of bytes to read with padding.
Definition: io/json.hpp:220
void set_recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:527
bool is_enabled_normalize_whitespace() const
Whether the reader should normalize unquoted whitespace characters.
Definition: io/json.hpp:323
bool is_strict_validation() const
Whether json validation should be enforced strictly or not.
Definition: io/json.hpp:337
void set_delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:436
void set_byte_range_offset(size_t offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:422
void enable_lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:460
dtype_variant const & get_dtypes() const
Returns data types of the columns.
Definition: io/json.hpp:192
void enable_keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:504
bool is_enabled_normalize_single_quotes() const
Whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:316
compression_type get_compression() const
Returns compression format of the source.
Definition: io/json.hpp:199
void set_dtypes(std::map< std::string, schema_element > types)
Set data types for a potentially nested column hierarchy.
Definition: io/json.hpp:400
size_t get_byte_range_size() const
Returns number of bytes to read.
Definition: io/json.hpp:213
std::variant< std::vector< data_type >, std::map< std::string, data_type >, std::map< std::string, schema_element >, schema_element > dtype_variant
Variant type holding dtypes information for the columns.
Definition: io/json.hpp:101
bool is_enabled_experimental() const
Whether to enable experimental features.
Definition: io/json.hpp:295
json_recovery_mode_t recovery_mode() const
Queries the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:330
static json_reader_options_builder builder(source_info src)
create json_reader_options_builder which will build json_reader_options.
bool is_enabled_dayfirst() const
Whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:302
size_t get_byte_range_padding() const
Returns number of bytes to pad when reading.
Definition: io/json.hpp:234
bool is_allowed_unquoted_control_chars() const
Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed w...
Definition: io/json.hpp:369
std::vector< std::string > const & get_na_values() const
Returns additional values to recognize as null values.
Definition: io/json.hpp:379
void set_byte_range_size(size_t size)
Set number of bytes to read.
Definition: io/json.hpp:429
Builder to build options for writer_json()
Definition: io/json.hpp:1122
json_writer_options_builder & include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1186
json_writer_options_builder & table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1150
json_writer_options_builder()=default
Default constructor.
json_writer_options_builder & rows_per_chunk(int val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1210
json_writer_options_builder & true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1222
json_writer_options_builder & false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1234
json_writer_options_builder(sink_info const &sink, table_view const &table)
Constructor from sink and table.
Definition: io/json.hpp:1139
json_writer_options_builder & na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1174
json_writer_options_builder & metadata(table_metadata metadata)
Sets optional metadata (with column names).
Definition: io/json.hpp:1162
json_writer_options && build()
move json_writer_options member once it's built.
Definition: io/json.hpp:1252
json_writer_options_builder & lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1198
Settings to use for write_json().
Definition: io/json.hpp:946
table_view const & get_table() const
Returns table that would be written to output.
Definition: io/json.hpp:1009
void set_false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1116
void enable_include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1088
bool is_enabled_include_nulls() const
Whether to output nulls as 'null'.
Definition: io/json.hpp:1030
void enable_lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1095
void set_na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1081
static json_writer_options_builder builder(sink_info const &sink, table_view const &table)
Create builder to create json_writer_options.
json_writer_options()=default
Default constructor.
void set_true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1109
sink_info const & get_sink() const
Returns sink used for writer output.
Definition: io/json.hpp:1002
void set_rows_per_chunk(size_type val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1102
std::string const & get_true_value() const
Returns string used for values != 0 in INT8 types.
Definition: io/json.hpp:1051
void set_table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1067
std::string const & get_false_value() const
Returns string used for values == 0 in INT8 types.
Definition: io/json.hpp:1058
bool is_enabled_lines() const
Whether to use JSON lines for records format.
Definition: io/json.hpp:1037
size_type get_rows_per_chunk() const
Returns maximum number of rows to process for each file write.
Definition: io/json.hpp:1044
std::optional< table_metadata > const & get_metadata() const
Returns metadata information.
Definition: io/json.hpp:1016
std::string const & get_na_rep() const
Returns string to used for null entries.
Definition: io/json.hpp:1023
void set_metadata(table_metadata metadata)
Sets metadata.
Definition: io/json.hpp:1074
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
size_type num_rows() const noexcept
Returns the number of rows.
Definition: table.hpp:93
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_json(json_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads a JSON dataset into a set of columns.
json_recovery_mode_t
Control the error recovery behavior of the json parser.
Definition: io/json.hpp:67
@ RECOVER_WITH_NULL
Recovers from an error, replacing invalid records with null.
@ FAIL
Does not recover from an error when encountering an invalid format.
compression_type
Compression algorithms.
Definition: io/types.hpp:57
void write_json(json_writer_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream())
Writes a set of columns to JSON format.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: aggregation.hpp:35
Allows specifying the target types for nested JSON data via json_reader_options' set_dtypes method.
Definition: io/json.hpp:47
std::optional< std::vector< std::string > > column_order
Allows specifying the order of the columns.
Definition: io/json.hpp:61
data_type type
The type that this column should be converted to.
Definition: io/json.hpp:51
std::map< std::string, schema_element > child_types
Allows specifying this column's child columns target type.
Definition: io/json.hpp:56
Destination information for write interfaces.
Definition: io/types.hpp:512
Source information for read interfaces.
Definition: io/types.hpp:337
Table metadata returned by IO readers.
Definition: io/types.hpp:277
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Class definitions for (mutable)_table_view
Type declarations for libcudf.