io/json.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include "types.hpp"
9 
10 #include <cudf/detail/utilities/visitor_overload.hpp>
11 #include <cudf/io/detail/utils.hpp>
13 #include <cudf/types.hpp>
14 #include <cudf/utilities/error.hpp>
16 
17 #include <map>
18 #include <string>
19 #include <utility>
20 #include <variant>
21 #include <vector>
22 
23 namespace CUDF_EXPORT cudf {
24 namespace io {
31 class json_reader_options_builder;
32 
42 
46  std::map<std::string, schema_element> child_types;
47 
51  std::optional<std::vector<std::string>> column_order;
52 };
53 
58  FAIL,
60 };
61 
86  public:
87  using dtype_variant =
88  std::variant<std::vector<data_type>,
89  std::map<std::string, data_type>,
90  std::map<std::string, schema_element>,
92 
93  private:
94  source_info _source;
95 
96  // Data types of the column; empty to infer dtypes
97  dtype_variant _dtypes;
98  // Specify the compression format of the source or infer from file extension
99  compression_type _compression = compression_type::AUTO;
100 
101  // Read the file as a json object per line
102  bool _lines = false;
103  // Parse mixed types as a string column
104  bool _mixed_types_as_string = false;
105  // Delimiter separating records in JSON lines
106  char _delimiter = '\n';
107  // Prune columns on read, selected based on the _dtypes option
108  bool _prune_columns = false;
109  // Experimental features: new column tree construction
110  bool _experimental = false;
111 
112  // Bytes to skip from the start
113  size_t _byte_range_offset = 0;
114  // Bytes to read; always reads complete rows
115  size_t _byte_range_size = 0;
116 
117  // Whether to parse dates as DD/MM versus MM/DD
118  bool _dayfirst = false;
119 
120  // Whether to keep the quote characters of string values
121  bool _keep_quotes = false;
122 
123  // Normalize single quotes
124  bool _normalize_single_quotes = false;
125 
126  // Normalize unquoted spaces and tabs
127  bool _normalize_whitespace = false;
128 
129  // Whether to recover after an invalid JSON line
130  json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;
131 
132  // Validation checks for spark
133  // Should the json validation be strict or not
134  // Note: strict validation enforces the JSON specification https://www.json.org/json-en.html
135  bool _strict_validation = false;
136  // Allow leading zeros for numeric values.
137  bool _allow_numeric_leading_zeros = true;
138  // Allow non-numeric numbers: NaN, +INF, -INF, +Infinity, Infinity, -Infinity
139  bool _allow_nonnumeric_numbers = true;
140  // Allow unquoted control characters
141  bool _allow_unquoted_control_chars = true;
142  // Additional values to recognize as null values
143  std::vector<std::string> _na_values;
144 
150  explicit json_reader_options(source_info src) : _source{std::move(src)} {}
151 
153 
154  public:
160  json_reader_options() = default;
161 
169 
175  [[nodiscard]] source_info const& get_source() const { return _source; }
176 
182  [[nodiscard]] dtype_variant const& get_dtypes() const { return _dtypes; }
183 
189  [[nodiscard]] compression_type get_compression() const { return _compression; }
190 
196  [[nodiscard]] size_t get_byte_range_offset() const { return _byte_range_offset; }
197 
203  [[nodiscard]] size_t get_byte_range_size() const { return _byte_range_size; }
204 
210  [[nodiscard]] size_t get_byte_range_size_with_padding() const
211  {
212  if (_byte_range_size == 0) {
213  return 0;
214  } else {
215  return _byte_range_size + get_byte_range_padding();
216  }
217  }
218 
224  [[nodiscard]] size_t get_byte_range_padding() const
225  {
226  auto const num_columns =
227  std::visit(cudf::detail::visitor_overload{
228  [](auto const& dtypes) { return dtypes.size(); },
229  [](schema_element const& dtypes) { return dtypes.child_types.size(); }},
230  _dtypes);
231 
232  auto const max_row_bytes = 16 * 1024; // 16KB
233  auto const column_bytes = 64;
234  auto const base_padding = 1024; // 1KB
235 
236  if (num_columns == 0) {
237  // Use flat size if the number of columns is not known
238  return max_row_bytes;
239  }
240 
241  // Expand the size based on the number of columns, if available
242  return base_padding + num_columns * column_bytes;
243  }
244 
250  [[nodiscard]] char get_delimiter() const { return _delimiter; }
251 
257  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
258 
264  [[nodiscard]] bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; }
265 
276  [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; }
277 
285  [[nodiscard]] bool is_enabled_experimental() const { return _experimental; }
286 
292  [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; }
293 
299  [[nodiscard]] bool is_enabled_keep_quotes() const { return _keep_quotes; }
300 
306  [[nodiscard]] bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; }
307 
313  [[nodiscard]] bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; }
314 
320  [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }
321 
327  [[nodiscard]] bool is_strict_validation() const { return _strict_validation; }
328 
336  [[nodiscard]] bool is_allowed_numeric_leading_zeros() const
337  {
338  return _allow_numeric_leading_zeros;
339  }
340 
349  [[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }
350 
359  [[nodiscard]] bool is_allowed_unquoted_control_chars() const
360  {
361  return _allow_unquoted_control_chars;
362  }
363 
369  [[nodiscard]] std::vector<std::string> const& get_na_values() const { return _na_values; }
370 
376  void set_source(source_info src) { _source = std::move(src); }
377 
383  void set_dtypes(std::vector<data_type> types) { _dtypes = std::move(types); }
384 
390  void set_dtypes(std::map<std::string, data_type> types) { _dtypes = std::move(types); }
391 
397  void set_dtypes(std::map<std::string, schema_element> types) { _dtypes = std::move(types); }
398 
406 
412  void set_compression(compression_type comp_type) { _compression = comp_type; }
413 
419  void set_byte_range_offset(size_t offset) { _byte_range_offset = offset; }
420 
426  void set_byte_range_size(size_t size) { _byte_range_size = size; }
427 
433  void set_delimiter(char delimiter)
434  {
435  switch (delimiter) {
436  case '{':
437  case '[':
438  case '}':
439  case ']':
440  case ',':
441  case ':':
442  case '"':
443  case '\'':
444  case '\\':
445  case ' ':
446  case '\t':
447  case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break;
448  }
449  _delimiter = delimiter;
450  }
451 
457  void enable_lines(bool val) { _lines = val; }
458 
465  void enable_mixed_types_as_string(bool val) { _mixed_types_as_string = val; }
466 
476  void enable_prune_columns(bool val) { _prune_columns = val; }
477 
486  void enable_experimental(bool val) { _experimental = val; }
487 
493  void enable_dayfirst(bool val) { _dayfirst = val; }
494 
501  void enable_keep_quotes(bool val) { _keep_quotes = val; }
502 
509  void enable_normalize_single_quotes(bool val) { _normalize_single_quotes = val; }
510 
517  void enable_normalize_whitespace(bool val) { _normalize_whitespace = val; }
518 
524  void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }
525 
531  void set_strict_validation(bool val) { _strict_validation = val; }
532 
542  {
543  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
544  _allow_numeric_leading_zeros = val;
545  }
546 
556  {
557  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
558  _allow_nonnumeric_numbers = val;
559  }
560 
571  {
572  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
573  _allow_unquoted_control_chars = val;
574  }
575 
581  void set_na_values(std::vector<std::string> vals) { _na_values = std::move(vals); }
582 };
583 
588  json_reader_options options;
589 
590  public:
596  explicit json_reader_options_builder() = default;
597 
603  explicit json_reader_options_builder(source_info src) : options{std::move(src)} {}
604 
611  json_reader_options_builder& dtypes(std::vector<data_type> types)
612  {
613  options._dtypes = std::move(types);
614  return *this;
615  }
616 
623  json_reader_options_builder& dtypes(std::map<std::string, data_type> types)
624  {
625  options._dtypes = std::move(types);
626  return *this;
627  }
628 
635  json_reader_options_builder& dtypes(std::map<std::string, schema_element> types)
636  {
637  options._dtypes = std::move(types);
638  return *this;
639  }
640 
648  {
649  options.set_dtypes(std::move(types));
650  return *this;
651  }
652 
660  {
661  options._compression = comp_type;
662  return *this;
663  }
664 
672  {
673  options._byte_range_offset = offset;
674  return *this;
675  }
676 
684  {
685  options._byte_range_size = size;
686  return *this;
687  }
688 
696  {
697  options.set_delimiter(delimiter);
698  return *this;
699  }
700 
708  {
709  options._lines = val;
710  return *this;
711  }
712 
721  {
722  options._mixed_types_as_string = val;
723  return *this;
724  }
725 
737  {
738  options._prune_columns = val;
739  return *this;
740  }
741 
752  {
753  options._experimental = val;
754  return *this;
755  }
756 
764  {
765  options._dayfirst = val;
766  return *this;
767  }
768 
777  {
778  options._keep_quotes = val;
779  return *this;
780  }
781 
790  {
791  options._normalize_single_quotes = val;
792  return *this;
793  }
794 
803  {
804  options._normalize_whitespace = val;
805  return *this;
806  }
807 
815  {
816  options._recovery_mode = val;
817  return *this;
818  }
819 
827  {
828  options.set_strict_validation(val);
829  return *this;
830  }
831 
842  {
843  options.allow_numeric_leading_zeros(val);
844  return *this;
845  }
846 
858  {
859  options.allow_nonnumeric_numbers(val);
860  return *this;
861  }
862 
873  {
874  options.allow_unquoted_control_chars(val);
875  return *this;
876  }
877 
884  json_reader_options_builder& na_values(std::vector<std::string> vals)
885  {
886  options.set_na_values(std::move(vals));
887  return *this;
888  }
889 
893  operator json_reader_options&&() { return std::move(options); }
894 
902  json_reader_options&& build() { return std::move(options); }
903 };
904 
923  json_reader_options options,
926  // end of group
928 
939 
944  // Specify the sink to use for writer output
945  sink_info _sink;
946  // Specify the compression format of the sink
947  compression_type _compression = compression_type::NONE;
948  // maximum number of rows to write in each chunk (limits memory use)
949  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
950  // Set of columns to output
951  table_view _table;
952  // string to use for null entries
953  std::string _na_rep = "";
954  // Indicates whether to output nulls as 'null' or exclude the field
955  bool _include_nulls = false;
956  // Indicates whether to use JSON lines for records format
957  bool _lines = false;
958  // string to use for values != 0 in INT8 types (default 'true')
959  std::string _true_value = std::string{"true"};
960  // string to use for values == 0 in INT8 types (default 'false')
961  std::string _false_value = std::string{"false"};
962  // Names of all columns; if empty, writer will generate column names
963  std::optional<table_metadata> _metadata; // Optional column names
964  // Indicates whether to escape UTF-8 characters in JSON output
965  bool _enable_utf8_escaped = true;
966 
974  : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table))
975  {
976  }
977 
979 
980  public:
986  explicit json_writer_options() = default;
987 
997 
1003  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
1004 
1010  [[nodiscard]] table_view const& get_table() const { return _table; }
1011 
1017  [[nodiscard]] std::optional<table_metadata> const& get_metadata() const { return _metadata; }
1018 
1024  [[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }
1025 
1031  [[nodiscard]] compression_type get_compression() const { return _compression; }
1032 
1038  [[nodiscard]] bool is_enabled_include_nulls() const { return _include_nulls; }
1039 
1045  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
1046 
1058  void enable_utf8_escaped(bool val) { _enable_utf8_escaped = val; }
1059 
1065  [[nodiscard]] bool is_enabled_utf8_escaped() const { return _enable_utf8_escaped; }
1066 
1072  [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; }
1073 
1079  [[nodiscard]] std::string const& get_true_value() const { return _true_value; }
1080 
1086  [[nodiscard]] std::string const& get_false_value() const { return _false_value; }
1087 
1088  // Setter
1089 
1095  void set_table(table_view tbl) { _table = tbl; }
1096 
1102  void set_compression(compression_type comptype) { _compression = comptype; }
1103 
1109  void set_metadata(table_metadata metadata) { _metadata = std::move(metadata); }
1110 
1116  void set_na_rep(std::string val) { _na_rep = std::move(val); }
1117 
1123  void enable_include_nulls(bool val) { _include_nulls = val; }
1124 
1130  void enable_lines(bool val) { _lines = val; }
1131 
1137  void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; }
1138 
1144  void set_true_value(std::string val) { _true_value = std::move(val); }
1145 
1151  void set_false_value(std::string val) { _false_value = std::move(val); }
1152 };
1153 
1158  json_writer_options options;
1159 
1160  public:
1166  explicit json_writer_options_builder() = default;
1167 
1175  : options{sink, table}
1176  {
1177  }
1178 
1186  {
1187  options._table = tbl;
1188  return *this;
1189  }
1190 
1198  {
1199  options._compression = comptype;
1200  return *this;
1201  }
1202 
1210  {
1211  options._metadata = std::move(metadata);
1212  return *this;
1213  }
1214 
1222  {
1223  options._na_rep = std::move(val);
1224  return *this;
1225  };
1226 
1234  {
1235  options._include_nulls = val;
1236  return *this;
1237  }
1238 
1248  {
1249  options._enable_utf8_escaped = val;
1250  return *this;
1251  }
1252 
1260  {
1261  options._lines = val;
1262  return *this;
1263  }
1264 
1272  {
1273  options._rows_per_chunk = val;
1274  return *this;
1275  }
1276 
1284  {
1285  options._true_value = std::move(val);
1286  return *this;
1287  }
1288 
1296  {
1297  options._false_value = std::move(val);
1298  return *this;
1299  }
1300 
1304  operator json_writer_options&&() { return std::move(options); }
1305 
1313  json_writer_options&& build() { return std::move(options); }
1314 };
1315 
1333 void write_json(json_writer_options const& options,
1335 
1337 struct is_supported_json_write_type_fn {
1338  template <typename T>
1339  constexpr bool operator()() const
1340  {
1341  return cudf::io::detail::is_convertible_to_string_column<T>();
1342  }
1343 };
1345 
1353 {
1354  return cudf::type_dispatcher(type, is_supported_json_write_type_fn{});
1355 }
1356  // end of group
1358 } // namespace io
1359 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:238
Builds settings to use for read_json().
Definition: io/json.hpp:587
json_reader_options_builder & normalize_single_quotes(bool val)
Set whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:789
json_reader_options_builder & nonnumeric_numbers(bool val)
Set whether specific unquoted number values are valid JSON. The values are NaN, +INF,...
Definition: io/json.hpp:857
json_reader_options_builder & keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:776
json_reader_options_builder & normalize_whitespace(bool val)
Set whether the reader should normalize unquoted whitespace.
Definition: io/json.hpp:802
json_reader_options_builder & numeric_leading_zeros(bool val)
Set Whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:841
json_reader_options_builder & dtypes(schema_element types)
Set data types for columns to be read.
Definition: io/json.hpp:647
json_reader_options_builder & dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:763
json_reader_options_builder & recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:814
json_reader_options_builder & na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:884
json_reader_options_builder & delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:695
json_reader_options_builder & prune_columns(bool val)
Set whether to prune columns on read, selected based on the dtypes option.
Definition: io/json.hpp:736
json_reader_options_builder & experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:751
json_reader_options_builder & lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:707
json_reader_options_builder & dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:611
json_reader_options && build()
move json_reader_options member once it's built.
Definition: io/json.hpp:902
json_reader_options_builder & mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:720
json_reader_options_builder & unquoted_control_chars(bool val)
Set whether chars >= 0 and < 32 are allowed in a quoted string without some form of escaping....
Definition: io/json.hpp:872
json_reader_options_builder & compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:659
json_reader_options_builder(source_info src)
Constructor from source info.
Definition: io/json.hpp:603
json_reader_options_builder & strict_validation(bool val)
Set whether json validation should be strict or not.
Definition: io/json.hpp:826
json_reader_options_builder & byte_range_size(size_type size)
Set number of bytes to read.
Definition: io/json.hpp:683
json_reader_options_builder & dtypes(std::map< std::string, schema_element > types)
Set data types for columns to be read.
Definition: io/json.hpp:635
json_reader_options_builder & byte_range_offset(size_type offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:671
json_reader_options_builder()=default
Default constructor.
json_reader_options_builder & dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:623
Input arguments to the read_json interface.
Definition: io/json.hpp:85
bool is_allowed_nonnumeric_numbers() const
Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:349
void enable_mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:465
void set_compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:412
void set_dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:383
void allow_unquoted_control_chars(bool val)
Set whether in a quoted string should characters greater than or equal to 0 and less than 32 be allow...
Definition: io/json.hpp:570
void set_source(source_info src)
Sets source info.
Definition: io/json.hpp:376
void enable_normalize_single_quotes(bool val)
Set whether the reader should enable normalization of single quotes around strings.
Definition: io/json.hpp:509
bool is_allowed_numeric_leading_zeros() const
Whether leading zeros are allowed in numeric values.
Definition: io/json.hpp:336
void enable_prune_columns(bool val)
Set whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:476
bool is_enabled_keep_quotes() const
Whether the reader should keep quotes of string values.
Definition: io/json.hpp:299
void set_dtypes(schema_element types)
Set data types for a potentially nested column hierarchy.
void enable_normalize_whitespace(bool val)
Set whether the reader should enable normalization of unquoted whitespace.
Definition: io/json.hpp:517
void allow_nonnumeric_numbers(bool val)
Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:555
size_t get_byte_range_offset() const
Returns number of bytes to skip from source start.
Definition: io/json.hpp:196
source_info const & get_source() const
Returns source info.
Definition: io/json.hpp:175
void enable_experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:486
void set_dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:390
bool is_enabled_prune_columns() const
Whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:276
char get_delimiter() const
Returns delimiter separating records in JSON lines.
Definition: io/json.hpp:250
bool is_enabled_lines() const
Whether to read the file as a json object per line.
Definition: io/json.hpp:257
void allow_numeric_leading_zeros(bool val)
Set whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:541
void set_strict_validation(bool val)
Set whether strict validation is enabled or not.
Definition: io/json.hpp:531
bool is_enabled_mixed_types_as_string() const
Whether to parse mixed types as a string column.
Definition: io/json.hpp:264
json_reader_options()=default
Default constructor.
void set_na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:581
void enable_dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:493
size_t get_byte_range_size_with_padding() const
Returns number of bytes to read with padding.
Definition: io/json.hpp:210
void set_recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:524
bool is_enabled_normalize_whitespace() const
Whether the reader should normalize unquoted whitespace characters.
Definition: io/json.hpp:313
bool is_strict_validation() const
Whether json validation should be enforced strictly or not.
Definition: io/json.hpp:327
void set_delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:433
void set_byte_range_offset(size_t offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:419
void enable_lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:457
dtype_variant const & get_dtypes() const
Returns data types of the columns.
Definition: io/json.hpp:182
void enable_keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:501
bool is_enabled_normalize_single_quotes() const
Whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:306
compression_type get_compression() const
Returns compression format of the source.
Definition: io/json.hpp:189
void set_dtypes(std::map< std::string, schema_element > types)
Set data types for a potentially nested column hierarchy.
Definition: io/json.hpp:397
size_t get_byte_range_size() const
Returns number of bytes to read.
Definition: io/json.hpp:203
std::variant< std::vector< data_type >, std::map< std::string, data_type >, std::map< std::string, schema_element >, schema_element > dtype_variant
Variant type holding dtypes information for the columns.
Definition: io/json.hpp:91
bool is_enabled_experimental() const
Whether to enable experimental features.
Definition: io/json.hpp:285
json_recovery_mode_t recovery_mode() const
Queries the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:320
static json_reader_options_builder builder(source_info src)
create json_reader_options_builder which will build json_reader_options.
bool is_enabled_dayfirst() const
Whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:292
size_t get_byte_range_padding() const
Returns number of bytes to pad when reading.
Definition: io/json.hpp:224
bool is_allowed_unquoted_control_chars() const
Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed w...
Definition: io/json.hpp:359
std::vector< std::string > const & get_na_values() const
Returns additional values to recognize as null values.
Definition: io/json.hpp:369
void set_byte_range_size(size_t size)
Set number of bytes to read.
Definition: io/json.hpp:426
Builder to build options for writer_json()
Definition: io/json.hpp:1157
json_writer_options_builder & compression(compression_type comptype)
Sets compression type of output sink.
Definition: io/json.hpp:1197
json_writer_options_builder & include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1233
json_writer_options_builder & table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1185
json_writer_options_builder()=default
Default constructor.
json_writer_options_builder & rows_per_chunk(int val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1271
json_writer_options_builder & utf8_escaped(bool val)
Enables/Disable UTF-8 escaped output for string fields.
Definition: io/json.hpp:1247
json_writer_options_builder & true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1283
json_writer_options_builder & false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1295
json_writer_options_builder(sink_info const &sink, table_view const &table)
Constructor from sink and table.
Definition: io/json.hpp:1174
json_writer_options_builder & na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1221
json_writer_options_builder & metadata(table_metadata metadata)
Sets optional metadata (with column names).
Definition: io/json.hpp:1209
json_writer_options && build()
move json_writer_options member once it's built.
Definition: io/json.hpp:1313
json_writer_options_builder & lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1259
Settings to use for write_json().
Definition: io/json.hpp:943
void set_compression(compression_type comptype)
Sets compression type to be used.
Definition: io/json.hpp:1102
compression_type get_compression() const
Returns compression type used for sink.
Definition: io/json.hpp:1031
table_view const & get_table() const
Returns table that would be written to output.
Definition: io/json.hpp:1010
void set_false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1151
void enable_include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1123
bool is_enabled_include_nulls() const
Whether to output nulls as 'null'.
Definition: io/json.hpp:1038
void enable_lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1130
void set_na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1116
static json_writer_options_builder builder(sink_info const &sink, table_view const &table)
Create builder to create json_writer_options.
json_writer_options()=default
Default constructor.
void set_true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1144
sink_info const & get_sink() const
Returns sink used for writer output.
Definition: io/json.hpp:1003
void enable_utf8_escaped(bool val)
Enable or disable writing escaped UTF-8 characters in JSON output.
Definition: io/json.hpp:1058
void set_rows_per_chunk(size_type val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1137
std::string const & get_true_value() const
Returns string used for values != 0 in INT8 types.
Definition: io/json.hpp:1079
void set_table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1095
std::string const & get_false_value() const
Returns string used for values == 0 in INT8 types.
Definition: io/json.hpp:1086
bool is_enabled_lines() const
Whether to use JSON lines for records format.
Definition: io/json.hpp:1045
bool is_enabled_utf8_escaped() const
Check whether UTF-8 escaped output is enabled.
Definition: io/json.hpp:1065
size_type get_rows_per_chunk() const
Returns maximum number of rows to process for each file write.
Definition: io/json.hpp:1072
std::optional< table_metadata > const & get_metadata() const
Returns metadata information.
Definition: io/json.hpp:1017
std::string const & get_na_rep() const
Returns string to used for null entries.
Definition: io/json.hpp:1024
void set_metadata(table_metadata metadata)
Sets metadata.
Definition: io/json.hpp:1109
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
size_type num_rows() const noexcept
Returns the number of rows.
Definition: table.hpp:82
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_json(json_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads a JSON dataset into a set of columns.
json_recovery_mode_t
Control the error recovery behavior of the json parser.
Definition: io/json.hpp:57
@ RECOVER_WITH_NULL
Recovers from an error, replacing invalid records with null.
@ FAIL
Does not recover from an error when encountering an invalid format.
compression_type
Compression algorithms.
Definition: io/types.hpp:46
void write_json(json_writer_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream())
Writes a set of columns to JSON format.
constexpr bool is_supported_write_json(data_type type)
Checks if a cudf::data_type is supported for JSON writing.
Definition: io/json.hpp:1352
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:143
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:182
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
Allows specifying the target types for nested JSON data via json_reader_options' set_dtypes method.
Definition: io/json.hpp:37
std::optional< std::vector< std::string > > column_order
Allows specifying the order of the columns.
Definition: io/json.hpp:51
data_type type
The type that this column should be converted to.
Definition: io/json.hpp:41
std::map< std::string, schema_element > child_types
Allows specifying this column's child columns target type.
Definition: io/json.hpp:46
Destination information for write interfaces.
Definition: io/types.hpp:471
Source information for read interfaces.
Definition: io/types.hpp:316
Table metadata returned by IO readers.
Definition: io/types.hpp:266
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Class definitions for (mutable)_table_view
Type declarations for libcudf.