io/json.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "types.hpp"
20 
21 #include <cudf/detail/utilities/visitor_overload.hpp>
22 #include <cudf/io/detail/utils.hpp>
24 #include <cudf/types.hpp>
25 #include <cudf/utilities/error.hpp>
27 
28 #include <map>
29 #include <string>
30 #include <utility>
31 #include <variant>
32 #include <vector>
33 
34 namespace CUDF_EXPORT cudf {
35 namespace io {
42 class json_reader_options_builder;
43 
53 
57  std::map<std::string, schema_element> child_types;
58 
62  std::optional<std::vector<std::string>> column_order;
63 };
64 
69  FAIL,
71 };
72 
97  public:
98  using dtype_variant =
99  std::variant<std::vector<data_type>,
100  std::map<std::string, data_type>,
101  std::map<std::string, schema_element>,
102  schema_element>;
103 
104  private:
105  source_info _source;
106 
107  // Data types of the column; empty to infer dtypes
108  dtype_variant _dtypes;
109  // Specify the compression format of the source or infer from file extension
110  compression_type _compression = compression_type::AUTO;
111 
112  // Read the file as a json object per line
113  bool _lines = false;
114  // Parse mixed types as a string column
115  bool _mixed_types_as_string = false;
116  // Delimiter separating records in JSON lines
117  char _delimiter = '\n';
118  // Prune columns on read, selected based on the _dtypes option
119  bool _prune_columns = false;
120  // Experimental features: new column tree construction
121  bool _experimental = false;
122 
123  // Bytes to skip from the start
124  size_t _byte_range_offset = 0;
125  // Bytes to read; always reads complete rows
126  size_t _byte_range_size = 0;
127 
128  // Whether to parse dates as DD/MM versus MM/DD
129  bool _dayfirst = false;
130 
131  // Whether to keep the quote characters of string values
132  bool _keep_quotes = false;
133 
134  // Normalize single quotes
135  bool _normalize_single_quotes = false;
136 
137  // Normalize unquoted spaces and tabs
138  bool _normalize_whitespace = false;
139 
140  // Whether to recover after an invalid JSON line
141  json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;
142 
143  // Validation checks for spark
144  // Should the json validation be strict or not
145  // Note: strict validation enforces the JSON specification https://www.json.org/json-en.html
146  bool _strict_validation = false;
147  // Allow leading zeros for numeric values.
148  bool _allow_numeric_leading_zeros = true;
149  // Allow non-numeric numbers: NaN, +INF, -INF, +Infinity, Infinity, -Infinity
150  bool _allow_nonnumeric_numbers = true;
151  // Allow unquoted control characters
152  bool _allow_unquoted_control_chars = true;
153  // Additional values to recognize as null values
154  std::vector<std::string> _na_values;
155 
161  explicit json_reader_options(source_info src) : _source{std::move(src)} {}
162 
164 
165  public:
171  json_reader_options() = default;
172 
180 
186  [[nodiscard]] source_info const& get_source() const { return _source; }
187 
193  [[nodiscard]] dtype_variant const& get_dtypes() const { return _dtypes; }
194 
200  [[nodiscard]] compression_type get_compression() const { return _compression; }
201 
207  [[nodiscard]] size_t get_byte_range_offset() const { return _byte_range_offset; }
208 
214  [[nodiscard]] size_t get_byte_range_size() const { return _byte_range_size; }
215 
221  [[nodiscard]] size_t get_byte_range_size_with_padding() const
222  {
223  if (_byte_range_size == 0) {
224  return 0;
225  } else {
226  return _byte_range_size + get_byte_range_padding();
227  }
228  }
229 
235  [[nodiscard]] size_t get_byte_range_padding() const
236  {
237  auto const num_columns =
238  std::visit(cudf::detail::visitor_overload{
239  [](auto const& dtypes) { return dtypes.size(); },
240  [](schema_element const& dtypes) { return dtypes.child_types.size(); }},
241  _dtypes);
242 
243  auto const max_row_bytes = 16 * 1024; // 16KB
244  auto const column_bytes = 64;
245  auto const base_padding = 1024; // 1KB
246 
247  if (num_columns == 0) {
248  // Use flat size if the number of columns is not known
249  return max_row_bytes;
250  }
251 
252  // Expand the size based on the number of columns, if available
253  return base_padding + num_columns * column_bytes;
254  }
255 
261  [[nodiscard]] char get_delimiter() const { return _delimiter; }
262 
268  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
269 
275  [[nodiscard]] bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; }
276 
287  [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; }
288 
296  [[nodiscard]] bool is_enabled_experimental() const { return _experimental; }
297 
303  [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; }
304 
310  [[nodiscard]] bool is_enabled_keep_quotes() const { return _keep_quotes; }
311 
317  [[nodiscard]] bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; }
318 
324  [[nodiscard]] bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; }
325 
331  [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }
332 
338  [[nodiscard]] bool is_strict_validation() const { return _strict_validation; }
339 
347  [[nodiscard]] bool is_allowed_numeric_leading_zeros() const
348  {
349  return _allow_numeric_leading_zeros;
350  }
351 
360  [[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }
361 
370  [[nodiscard]] bool is_allowed_unquoted_control_chars() const
371  {
372  return _allow_unquoted_control_chars;
373  }
374 
380  [[nodiscard]] std::vector<std::string> const& get_na_values() const { return _na_values; }
381 
387  void set_dtypes(std::vector<data_type> types) { _dtypes = std::move(types); }
388 
394  void set_dtypes(std::map<std::string, data_type> types) { _dtypes = std::move(types); }
395 
401  void set_dtypes(std::map<std::string, schema_element> types) { _dtypes = std::move(types); }
402 
410 
416  void set_compression(compression_type comp_type) { _compression = comp_type; }
417 
423  void set_byte_range_offset(size_t offset) { _byte_range_offset = offset; }
424 
430  void set_byte_range_size(size_t size) { _byte_range_size = size; }
431 
437  void set_delimiter(char delimiter)
438  {
439  switch (delimiter) {
440  case '{':
441  case '[':
442  case '}':
443  case ']':
444  case ',':
445  case ':':
446  case '"':
447  case '\'':
448  case '\\':
449  case ' ':
450  case '\t':
451  case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break;
452  }
453  _delimiter = delimiter;
454  }
455 
461  void enable_lines(bool val) { _lines = val; }
462 
469  void enable_mixed_types_as_string(bool val) { _mixed_types_as_string = val; }
470 
480  void enable_prune_columns(bool val) { _prune_columns = val; }
481 
490  void enable_experimental(bool val) { _experimental = val; }
491 
497  void enable_dayfirst(bool val) { _dayfirst = val; }
498 
505  void enable_keep_quotes(bool val) { _keep_quotes = val; }
506 
513  void enable_normalize_single_quotes(bool val) { _normalize_single_quotes = val; }
514 
521  void enable_normalize_whitespace(bool val) { _normalize_whitespace = val; }
522 
528  void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }
529 
535  void set_strict_validation(bool val) { _strict_validation = val; }
536 
546  {
547  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
548  _allow_numeric_leading_zeros = val;
549  }
550 
560  {
561  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
562  _allow_nonnumeric_numbers = val;
563  }
564 
575  {
576  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
577  _allow_unquoted_control_chars = val;
578  }
579 
585  void set_na_values(std::vector<std::string> vals) { _na_values = std::move(vals); }
586 };
587 
592  json_reader_options options;
593 
594  public:
600  explicit json_reader_options_builder() = default;
601 
607  explicit json_reader_options_builder(source_info src) : options{std::move(src)} {}
608 
615  json_reader_options_builder& dtypes(std::vector<data_type> types)
616  {
617  options._dtypes = std::move(types);
618  return *this;
619  }
620 
627  json_reader_options_builder& dtypes(std::map<std::string, data_type> types)
628  {
629  options._dtypes = std::move(types);
630  return *this;
631  }
632 
639  json_reader_options_builder& dtypes(std::map<std::string, schema_element> types)
640  {
641  options._dtypes = std::move(types);
642  return *this;
643  }
644 
652  {
653  options.set_dtypes(std::move(types));
654  return *this;
655  }
656 
664  {
665  options._compression = comp_type;
666  return *this;
667  }
668 
676  {
677  options._byte_range_offset = offset;
678  return *this;
679  }
680 
688  {
689  options._byte_range_size = size;
690  return *this;
691  }
692 
700  {
701  options.set_delimiter(delimiter);
702  return *this;
703  }
704 
712  {
713  options._lines = val;
714  return *this;
715  }
716 
725  {
726  options._mixed_types_as_string = val;
727  return *this;
728  }
729 
741  {
742  options._prune_columns = val;
743  return *this;
744  }
745 
756  {
757  options._experimental = val;
758  return *this;
759  }
760 
768  {
769  options._dayfirst = val;
770  return *this;
771  }
772 
781  {
782  options._keep_quotes = val;
783  return *this;
784  }
785 
794  {
795  options._normalize_single_quotes = val;
796  return *this;
797  }
798 
807  {
808  options._normalize_whitespace = val;
809  return *this;
810  }
811 
819  {
820  options._recovery_mode = val;
821  return *this;
822  }
823 
831  {
832  options.set_strict_validation(val);
833  return *this;
834  }
835 
846  {
847  options.allow_numeric_leading_zeros(val);
848  return *this;
849  }
850 
862  {
863  options.allow_nonnumeric_numbers(val);
864  return *this;
865  }
866 
877  {
878  options.allow_unquoted_control_chars(val);
879  return *this;
880  }
881 
888  json_reader_options_builder& na_values(std::vector<std::string> vals)
889  {
890  options.set_na_values(std::move(vals));
891  return *this;
892  }
893 
897  operator json_reader_options&&() { return std::move(options); }
898 
906  json_reader_options&& build() { return std::move(options); }
907 };
908 
927  json_reader_options options,
930  // end of group
932 
943 
948  // Specify the sink to use for writer output
949  sink_info _sink;
950  // Specify the compression format of the sink
951  compression_type _compression = compression_type::NONE;
952  // maximum number of rows to write in each chunk (limits memory use)
953  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
954  // Set of columns to output
955  table_view _table;
956  // string to use for null entries
957  std::string _na_rep = "";
958  // Indicates whether to output nulls as 'null' or exclude the field
959  bool _include_nulls = false;
960  // Indicates whether to use JSON lines for records format
961  bool _lines = false;
962  // string to use for values != 0 in INT8 types (default 'true')
963  std::string _true_value = std::string{"true"};
964  // string to use for values == 0 in INT8 types (default 'false')
965  std::string _false_value = std::string{"false"};
966  // Names of all columns; if empty, writer will generate column names
967  std::optional<table_metadata> _metadata; // Optional column names
968  // Indicates whether to escape UTF-8 characters in JSON output
969  bool _enable_utf8_escaped = true;
970 
978  : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table))
979  {
980  }
981 
983 
984  public:
990  explicit json_writer_options() = default;
991 
1001 
1007  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
1008 
1014  [[nodiscard]] table_view const& get_table() const { return _table; }
1015 
1021  [[nodiscard]] std::optional<table_metadata> const& get_metadata() const { return _metadata; }
1022 
1028  [[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }
1029 
1035  [[nodiscard]] compression_type get_compression() const { return _compression; }
1036 
1042  [[nodiscard]] bool is_enabled_include_nulls() const { return _include_nulls; }
1043 
1049  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
1050 
1062  void enable_utf8_escaped(bool val) { _enable_utf8_escaped = val; }
1063 
1069  [[nodiscard]] bool is_enabled_utf8_escaped() const { return _enable_utf8_escaped; }
1070 
1076  [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; }
1077 
1083  [[nodiscard]] std::string const& get_true_value() const { return _true_value; }
1084 
1090  [[nodiscard]] std::string const& get_false_value() const { return _false_value; }
1091 
1092  // Setter
1093 
1099  void set_table(table_view tbl) { _table = tbl; }
1100 
1106  void set_compression(compression_type comptype) { _compression = comptype; }
1107 
1113  void set_metadata(table_metadata metadata) { _metadata = std::move(metadata); }
1114 
1120  void set_na_rep(std::string val) { _na_rep = std::move(val); }
1121 
1127  void enable_include_nulls(bool val) { _include_nulls = val; }
1128 
1134  void enable_lines(bool val) { _lines = val; }
1135 
1141  void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; }
1142 
1148  void set_true_value(std::string val) { _true_value = std::move(val); }
1149 
1155  void set_false_value(std::string val) { _false_value = std::move(val); }
1156 };
1157 
1162  json_writer_options options;
1163 
1164  public:
1170  explicit json_writer_options_builder() = default;
1171 
1179  : options{sink, table}
1180  {
1181  }
1182 
1190  {
1191  options._table = tbl;
1192  return *this;
1193  }
1194 
1202  {
1203  options._compression = comptype;
1204  return *this;
1205  }
1206 
1214  {
1215  options._metadata = std::move(metadata);
1216  return *this;
1217  }
1218 
1226  {
1227  options._na_rep = std::move(val);
1228  return *this;
1229  };
1230 
1238  {
1239  options._include_nulls = val;
1240  return *this;
1241  }
1242 
1252  {
1253  options._enable_utf8_escaped = val;
1254  return *this;
1255  }
1256 
1264  {
1265  options._lines = val;
1266  return *this;
1267  }
1268 
1276  {
1277  options._rows_per_chunk = val;
1278  return *this;
1279  }
1280 
1288  {
1289  options._true_value = std::move(val);
1290  return *this;
1291  }
1292 
1300  {
1301  options._false_value = std::move(val);
1302  return *this;
1303  }
1304 
1308  operator json_writer_options&&() { return std::move(options); }
1309 
1317  json_writer_options&& build() { return std::move(options); }
1318 };
1319 
1337 void write_json(json_writer_options const& options,
1339 
1341 struct is_supported_json_write_type_fn {
1342  template <typename T>
1343  constexpr bool operator()() const
1344  {
1345  return cudf::io::detail::is_convertible_to_string_column<T>();
1346  }
1347 };
1349 
1357 {
1358  return cudf::type_dispatcher(type, is_supported_json_write_type_fn{});
1359 }
1360  // end of group
1362 } // namespace io
1363 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
Builds settings to use for read_json().
Definition: io/json.hpp:591
json_reader_options_builder & normalize_single_quotes(bool val)
Set whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:793
json_reader_options_builder & nonnumeric_numbers(bool val)
Set whether specific unquoted number values are valid JSON. The values are NaN, +INF,...
Definition: io/json.hpp:861
json_reader_options_builder & keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:780
json_reader_options_builder & normalize_whitespace(bool val)
Set whether the reader should normalize unquoted whitespace.
Definition: io/json.hpp:806
json_reader_options_builder & numeric_leading_zeros(bool val)
Set Whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:845
json_reader_options_builder & dtypes(schema_element types)
Set data types for columns to be read.
Definition: io/json.hpp:651
json_reader_options_builder & dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:767
json_reader_options_builder & recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:818
json_reader_options_builder & na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:888
json_reader_options_builder & delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:699
json_reader_options_builder & prune_columns(bool val)
Set whether to prune columns on read, selected based on the dtypes option.
Definition: io/json.hpp:740
json_reader_options_builder & experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:755
json_reader_options_builder & lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:711
json_reader_options_builder & dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:615
json_reader_options && build()
move json_reader_options member once it's built.
Definition: io/json.hpp:906
json_reader_options_builder & mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:724
json_reader_options_builder & unquoted_control_chars(bool val)
Set whether chars >= 0 and < 32 are allowed in a quoted string without some form of escaping....
Definition: io/json.hpp:876
json_reader_options_builder & compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:663
json_reader_options_builder(source_info src)
Constructor from source info.
Definition: io/json.hpp:607
json_reader_options_builder & strict_validation(bool val)
Set whether json validation should be strict or not.
Definition: io/json.hpp:830
json_reader_options_builder & byte_range_size(size_type size)
Set number of bytes to read.
Definition: io/json.hpp:687
json_reader_options_builder & dtypes(std::map< std::string, schema_element > types)
Set data types for columns to be read.
Definition: io/json.hpp:639
json_reader_options_builder & byte_range_offset(size_type offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:675
json_reader_options_builder()=default
Default constructor.
json_reader_options_builder & dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:627
Input arguments to the read_json interface.
Definition: io/json.hpp:96
bool is_allowed_nonnumeric_numbers() const
Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:360
void enable_mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:469
void set_compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:416
void set_dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:387
void allow_unquoted_control_chars(bool val)
Set whether in a quoted string should characters greater than or equal to 0 and less than 32 be allow...
Definition: io/json.hpp:574
void enable_normalize_single_quotes(bool val)
Set whether the reader should enable normalization of single quotes around strings.
Definition: io/json.hpp:513
bool is_allowed_numeric_leading_zeros() const
Whether leading zeros are allowed in numeric values.
Definition: io/json.hpp:347
void enable_prune_columns(bool val)
Set whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:480
bool is_enabled_keep_quotes() const
Whether the reader should keep quotes of string values.
Definition: io/json.hpp:310
void set_dtypes(schema_element types)
Set data types for a potentially nested column hierarchy.
void enable_normalize_whitespace(bool val)
Set whether the reader should enable normalization of unquoted whitespace.
Definition: io/json.hpp:521
void allow_nonnumeric_numbers(bool val)
Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:559
size_t get_byte_range_offset() const
Returns number of bytes to skip from source start.
Definition: io/json.hpp:207
source_info const & get_source() const
Returns source info.
Definition: io/json.hpp:186
void enable_experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:490
void set_dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:394
bool is_enabled_prune_columns() const
Whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:287
char get_delimiter() const
Returns delimiter separating records in JSON lines.
Definition: io/json.hpp:261
bool is_enabled_lines() const
Whether to read the file as a json object per line.
Definition: io/json.hpp:268
void allow_numeric_leading_zeros(bool val)
Set whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:545
void set_strict_validation(bool val)
Set whether strict validation is enabled or not.
Definition: io/json.hpp:535
bool is_enabled_mixed_types_as_string() const
Whether to parse mixed types as a string column.
Definition: io/json.hpp:275
json_reader_options()=default
Default constructor.
void set_na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:585
void enable_dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:497
size_t get_byte_range_size_with_padding() const
Returns number of bytes to read with padding.
Definition: io/json.hpp:221
void set_recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:528
bool is_enabled_normalize_whitespace() const
Whether the reader should normalize unquoted whitespace characters.
Definition: io/json.hpp:324
bool is_strict_validation() const
Whether json validation should be enforced strictly or not.
Definition: io/json.hpp:338
void set_delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:437
void set_byte_range_offset(size_t offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:423
void enable_lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:461
dtype_variant const & get_dtypes() const
Returns data types of the columns.
Definition: io/json.hpp:193
void enable_keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:505
bool is_enabled_normalize_single_quotes() const
Whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:317
compression_type get_compression() const
Returns compression format of the source.
Definition: io/json.hpp:200
void set_dtypes(std::map< std::string, schema_element > types)
Set data types for a potentially nested column hierarchy.
Definition: io/json.hpp:401
size_t get_byte_range_size() const
Returns number of bytes to read.
Definition: io/json.hpp:214
std::variant< std::vector< data_type >, std::map< std::string, data_type >, std::map< std::string, schema_element >, schema_element > dtype_variant
Variant type holding dtypes information for the columns.
Definition: io/json.hpp:102
bool is_enabled_experimental() const
Whether to enable experimental features.
Definition: io/json.hpp:296
json_recovery_mode_t recovery_mode() const
Queries the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:331
static json_reader_options_builder builder(source_info src)
create json_reader_options_builder which will build json_reader_options.
bool is_enabled_dayfirst() const
Whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:303
size_t get_byte_range_padding() const
Returns number of bytes to pad when reading.
Definition: io/json.hpp:235
bool is_allowed_unquoted_control_chars() const
Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed w...
Definition: io/json.hpp:370
std::vector< std::string > const & get_na_values() const
Returns additional values to recognize as null values.
Definition: io/json.hpp:380
void set_byte_range_size(size_t size)
Set number of bytes to read.
Definition: io/json.hpp:430
Builder to build options for writer_json()
Definition: io/json.hpp:1161
json_writer_options_builder & compression(compression_type comptype)
Sets compression type of output sink.
Definition: io/json.hpp:1201
json_writer_options_builder & include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1237
json_writer_options_builder & table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1189
json_writer_options_builder()=default
Default constructor.
json_writer_options_builder & rows_per_chunk(int val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1275
json_writer_options_builder & utf8_escaped(bool val)
Enables/Disable UTF-8 escaped output for string fields.
Definition: io/json.hpp:1251
json_writer_options_builder & true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1287
json_writer_options_builder & false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1299
json_writer_options_builder(sink_info const &sink, table_view const &table)
Constructor from sink and table.
Definition: io/json.hpp:1178
json_writer_options_builder & na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1225
json_writer_options_builder & metadata(table_metadata metadata)
Sets optional metadata (with column names).
Definition: io/json.hpp:1213
json_writer_options && build()
move json_writer_options member once it's built.
Definition: io/json.hpp:1317
json_writer_options_builder & lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1263
Settings to use for write_json().
Definition: io/json.hpp:947
void set_compression(compression_type comptype)
Sets compression type to be used.
Definition: io/json.hpp:1106
compression_type get_compression() const
Returns compression type used for sink.
Definition: io/json.hpp:1035
table_view const & get_table() const
Returns table that would be written to output.
Definition: io/json.hpp:1014
void set_false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1155
void enable_include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1127
bool is_enabled_include_nulls() const
Whether to output nulls as 'null'.
Definition: io/json.hpp:1042
void enable_lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1134
void set_na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1120
static json_writer_options_builder builder(sink_info const &sink, table_view const &table)
Create builder to create json_writer_options.
json_writer_options()=default
Default constructor.
void set_true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1148
sink_info const & get_sink() const
Returns sink used for writer output.
Definition: io/json.hpp:1007
void enable_utf8_escaped(bool val)
Enable or disable writing escaped UTF-8 characters in JSON output.
Definition: io/json.hpp:1062
void set_rows_per_chunk(size_type val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1141
std::string const & get_true_value() const
Returns string used for values != 0 in INT8 types.
Definition: io/json.hpp:1083
void set_table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1099
std::string const & get_false_value() const
Returns string used for values == 0 in INT8 types.
Definition: io/json.hpp:1090
bool is_enabled_lines() const
Whether to use JSON lines for records format.
Definition: io/json.hpp:1049
bool is_enabled_utf8_escaped() const
Check whether UTF-8 escaped output is enabled.
Definition: io/json.hpp:1069
size_type get_rows_per_chunk() const
Returns maximum number of rows to process for each file write.
Definition: io/json.hpp:1076
std::optional< table_metadata > const & get_metadata() const
Returns metadata information.
Definition: io/json.hpp:1021
std::string const & get_na_rep() const
Returns string to used for null entries.
Definition: io/json.hpp:1028
void set_metadata(table_metadata metadata)
Sets metadata.
Definition: io/json.hpp:1113
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
size_type num_rows() const noexcept
Returns the number of rows.
Definition: table.hpp:93
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_json(json_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads a JSON dataset into a set of columns.
json_recovery_mode_t
Control the error recovery behavior of the json parser.
Definition: io/json.hpp:68
@ RECOVER_WITH_NULL
Recovers from an error, replacing invalid records with null.
@ FAIL
Does not recover from an error when encountering an invalid format.
compression_type
Compression algorithms.
Definition: io/types.hpp:57
void write_json(json_writer_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream())
Writes a set of columns to JSON format.
constexpr bool is_supported_write_json(data_type type)
Checks if a cudf::data_type is supported for JSON writing.
Definition: io/json.hpp:1356
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: host_udf.hpp:37
Allows specifying the target types for nested JSON data via json_reader_options' set_dtypes method.
Definition: io/json.hpp:48
std::optional< std::vector< std::string > > column_order
Allows specifying the order of the columns.
Definition: io/json.hpp:62
data_type type
The type that this column should be converted to.
Definition: io/json.hpp:52
std::map< std::string, schema_element > child_types
Allows specifying this column's child columns target type.
Definition: io/json.hpp:57
Destination information for write interfaces.
Definition: io/types.hpp:468
Source information for read interfaces.
Definition: io/types.hpp:327
Table metadata returned by IO readers.
Definition: io/types.hpp:277
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:303
Class definitions for (mutable)_table_view
Type declarations for libcudf.