io/json.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "types.hpp"
20 
22 #include <cudf/types.hpp>
23 #include <cudf/utilities/error.hpp>
25 
26 #include <map>
27 #include <string>
28 #include <utility>
29 #include <variant>
30 #include <vector>
31 
32 namespace CUDF_EXPORT cudf {
33 namespace io {
40 class json_reader_options_builder;
41 
51 
55  std::map<std::string, schema_element> child_types;
56 };
57 
62  FAIL,
64 };
65 
90  source_info _source;
91 
92  // Data types of the column; empty to infer dtypes
93  std::variant<std::vector<data_type>,
94  std::map<std::string, data_type>,
95  std::map<std::string, schema_element>>
96  _dtypes;
97  // Specify the compression format of the source or infer from file extension
98  compression_type _compression = compression_type::AUTO;
99 
100  // Read the file as a json object per line
101  bool _lines = false;
102  // Parse mixed types as a string column
103  bool _mixed_types_as_string = false;
104  // Delimiter separating records in JSON lines
105  char _delimiter = '\n';
106  // Prune columns on read, selected based on the _dtypes option
107  bool _prune_columns = false;
108 
109  // Bytes to skip from the start
110  size_t _byte_range_offset = 0;
111  // Bytes to read; always reads complete rows
112  size_t _byte_range_size = 0;
113 
114  // Whether to parse dates as DD/MM versus MM/DD
115  bool _dayfirst = false;
116 
117  // Whether to use the legacy reader
118  bool _legacy = false;
119 
120  // Whether to keep the quote characters of string values
121  bool _keep_quotes = false;
122 
123  // Normalize single quotes
124  bool _normalize_single_quotes = false;
125 
126  // Normalize unquoted spaces and tabs
127  bool _normalize_whitespace = false;
128 
129  // Whether to recover after an invalid JSON line
130  json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;
131 
132  // Validation checks for spark
133  // Should the json validation be strict or not
134  // Note: strict validation enforces the JSON specification https://www.json.org/json-en.html
135  bool _strict_validation = false;
136  // Allow leading zeros for numeric values.
137  bool _allow_numeric_leading_zeros = true;
138  // Allow non-numeric numbers: NaN, +INF, -INF, +Infinity, Infinity, -Infinity
139  bool _allow_nonnumeric_numbers = true;
140  // Allow unquoted control characters
141  bool _allow_unquoted_control_chars = true;
142  // Additional values to recognize as null values
143  std::vector<std::string> _na_values;
144 
150  explicit json_reader_options(source_info src) : _source{std::move(src)} {}
151 
153 
154  public:
160  json_reader_options() = default;
161 
169 
175  [[nodiscard]] source_info const& get_source() const { return _source; }
176 
182  [[nodiscard]] std::variant<std::vector<data_type>,
183  std::map<std::string, data_type>,
184  std::map<std::string, schema_element>> const&
185  get_dtypes() const
186  {
187  return _dtypes;
188  }
189 
195  [[nodiscard]] compression_type get_compression() const { return _compression; }
196 
202  [[nodiscard]] size_t get_byte_range_offset() const { return _byte_range_offset; }
203 
209  [[nodiscard]] size_t get_byte_range_size() const { return _byte_range_size; }
210 
216  [[nodiscard]] size_t get_byte_range_size_with_padding() const
217  {
218  if (_byte_range_size == 0) {
219  return 0;
220  } else {
221  return _byte_range_size + get_byte_range_padding();
222  }
223  }
224 
230  [[nodiscard]] size_t get_byte_range_padding() const
231  {
232  auto const num_columns = std::visit([](auto const& dtypes) { return dtypes.size(); }, _dtypes);
233 
234  auto const max_row_bytes = 16 * 1024; // 16KB
235  auto const column_bytes = 64;
236  auto const base_padding = 1024; // 1KB
237 
238  if (num_columns == 0) {
239  // Use flat size if the number of columns is not known
240  return max_row_bytes;
241  }
242 
243  // Expand the size based on the number of columns, if available
244  return base_padding + num_columns * column_bytes;
245  }
246 
252  [[nodiscard]] char get_delimiter() const { return _delimiter; }
253 
259  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
260 
266  [[nodiscard]] bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; }
267 
278  [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; }
279 
285  [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; }
286 
292  [[nodiscard]] bool is_enabled_keep_quotes() const { return _keep_quotes; }
293 
299  [[nodiscard]] bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; }
300 
306  [[nodiscard]] bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; }
307 
313  [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }
314 
320  [[nodiscard]] bool is_strict_validation() const { return _strict_validation; }
321 
329  [[nodiscard]] bool is_allowed_numeric_leading_zeros() const
330  {
331  return _allow_numeric_leading_zeros;
332  }
333 
342  [[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }
343 
352  [[nodiscard]] bool is_allowed_unquoted_control_chars() const
353  {
354  return _allow_unquoted_control_chars;
355  }
356 
362  [[nodiscard]] std::vector<std::string> const& get_na_values() const { return _na_values; }
363 
369  void set_dtypes(std::vector<data_type> types) { _dtypes = std::move(types); }
370 
376  void set_dtypes(std::map<std::string, data_type> types) { _dtypes = std::move(types); }
377 
383  void set_dtypes(std::map<std::string, schema_element> types) { _dtypes = std::move(types); }
384 
390  void set_compression(compression_type comp_type) { _compression = comp_type; }
391 
397  void set_byte_range_offset(size_t offset) { _byte_range_offset = offset; }
398 
404  void set_byte_range_size(size_t size) { _byte_range_size = size; }
405 
411  void set_delimiter(char delimiter)
412  {
413  switch (delimiter) {
414  case '{':
415  case '[':
416  case '}':
417  case ']':
418  case ',':
419  case ':':
420  case '"':
421  case '\'':
422  case '\\':
423  case ' ':
424  case '\t':
425  case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break;
426  }
427  _delimiter = delimiter;
428  }
429 
435  void enable_lines(bool val) { _lines = val; }
436 
443  void enable_mixed_types_as_string(bool val) { _mixed_types_as_string = val; }
444 
454  void enable_prune_columns(bool val) { _prune_columns = val; }
455 
461  void enable_dayfirst(bool val) { _dayfirst = val; }
462 
469  void enable_keep_quotes(bool val) { _keep_quotes = val; }
470 
477  void enable_normalize_single_quotes(bool val) { _normalize_single_quotes = val; }
478 
485  void enable_normalize_whitespace(bool val) { _normalize_whitespace = val; }
486 
492  void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }
493 
499  void set_strict_validation(bool val) { _strict_validation = val; }
500 
510  {
511  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
512  _allow_numeric_leading_zeros = val;
513  }
514 
524  {
525  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
526  _allow_nonnumeric_numbers = val;
527  }
528 
539  {
540  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
541  _allow_unquoted_control_chars = val;
542  }
543 
549  void set_na_values(std::vector<std::string> vals) { _na_values = std::move(vals); }
550 };
551 
556  json_reader_options options;
557 
558  public:
564  explicit json_reader_options_builder() = default;
565 
571  explicit json_reader_options_builder(source_info src) : options{std::move(src)} {}
572 
579  json_reader_options_builder& dtypes(std::vector<data_type> types)
580  {
581  options._dtypes = std::move(types);
582  return *this;
583  }
584 
591  json_reader_options_builder& dtypes(std::map<std::string, data_type> types)
592  {
593  options._dtypes = std::move(types);
594  return *this;
595  }
596 
603  json_reader_options_builder& dtypes(std::map<std::string, schema_element> types)
604  {
605  options._dtypes = std::move(types);
606  return *this;
607  }
608 
616  {
617  options._compression = comp_type;
618  return *this;
619  }
620 
628  {
629  options._byte_range_offset = offset;
630  return *this;
631  }
632 
640  {
641  options._byte_range_size = size;
642  return *this;
643  }
644 
652  {
653  options.set_delimiter(delimiter);
654  return *this;
655  }
656 
664  {
665  options._lines = val;
666  return *this;
667  }
668 
677  {
678  options._mixed_types_as_string = val;
679  return *this;
680  }
681 
693  {
694  options._prune_columns = val;
695  return *this;
696  }
697 
705  {
706  options._dayfirst = val;
707  return *this;
708  }
709 
718  {
719  options._keep_quotes = val;
720  return *this;
721  }
722 
731  {
732  options._normalize_single_quotes = val;
733  return *this;
734  }
735 
744  {
745  options._normalize_whitespace = val;
746  return *this;
747  }
748 
756  {
757  options._recovery_mode = val;
758  return *this;
759  }
760 
768  {
769  options.set_strict_validation(val);
770  return *this;
771  }
772 
783  {
784  options.allow_numeric_leading_zeros(val);
785  return *this;
786  }
787 
799  {
800  options.allow_nonnumeric_numbers(val);
801  return *this;
802  }
803 
814  {
815  options.allow_unquoted_control_chars(val);
816  return *this;
817  }
818 
825  json_reader_options_builder& na_values(std::vector<std::string> vals)
826  {
827  options.set_na_values(std::move(vals));
828  return *this;
829  }
830 
834  operator json_reader_options&&() { return std::move(options); }
835 
843  json_reader_options&& build() { return std::move(options); }
844 };
845 
864  json_reader_options options,
867  // end of group
869 
880 
885  // Specify the sink to use for writer output
886  sink_info _sink;
887  // maximum number of rows to write in each chunk (limits memory use)
888  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
889  // Set of columns to output
890  table_view _table;
891  // string to use for null entries
892  std::string _na_rep = "";
893  // Indicates whether to output nulls as 'null' or exclude the field
894  bool _include_nulls = false;
895  // Indicates whether to use JSON lines for records format
896  bool _lines = false;
897  // string to use for values != 0 in INT8 types (default 'true')
898  std::string _true_value = std::string{"true"};
899  // string to use for values == 0 in INT8 types (default 'false')
900  std::string _false_value = std::string{"false"};
901  // Names of all columns; if empty, writer will generate column names
902  std::optional<table_metadata> _metadata; // Optional column names
903 
911  : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table))
912  {
913  }
914 
916 
917  public:
923  explicit json_writer_options() = default;
924 
934 
940  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
941 
947  [[nodiscard]] table_view const& get_table() const { return _table; }
948 
954  [[nodiscard]] std::optional<table_metadata> const& get_metadata() const { return _metadata; }
955 
961  [[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }
962 
968  [[nodiscard]] bool is_enabled_include_nulls() const { return _include_nulls; }
969 
975  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
976 
982  [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; }
983 
989  [[nodiscard]] std::string const& get_true_value() const { return _true_value; }
990 
996  [[nodiscard]] std::string const& get_false_value() const { return _false_value; }
997 
998  // Setter
999 
1005  void set_table(table_view tbl) { _table = tbl; }
1006 
1012  void set_metadata(table_metadata metadata) { _metadata = std::move(metadata); }
1013 
1019  void set_na_rep(std::string val) { _na_rep = std::move(val); }
1020 
1026  void enable_include_nulls(bool val) { _include_nulls = val; }
1027 
1033  void enable_lines(bool val) { _lines = val; }
1034 
1040  void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; }
1041 
1047  void set_true_value(std::string val) { _true_value = std::move(val); }
1048 
1054  void set_false_value(std::string val) { _false_value = std::move(val); }
1055 };
1056 
1061  json_writer_options options;
1062 
1063  public:
1069  explicit json_writer_options_builder() = default;
1070 
1078  : options{sink, table}
1079  {
1080  }
1081 
1089  {
1090  options._table = tbl;
1091  return *this;
1092  }
1093 
1101  {
1102  options._metadata = std::move(metadata);
1103  return *this;
1104  }
1105 
1113  {
1114  options._na_rep = std::move(val);
1115  return *this;
1116  };
1117 
1125  {
1126  options._include_nulls = val;
1127  return *this;
1128  }
1129 
1137  {
1138  options._lines = val;
1139  return *this;
1140  }
1141 
1149  {
1150  options._rows_per_chunk = val;
1151  return *this;
1152  }
1153 
1161  {
1162  options._true_value = std::move(val);
1163  return *this;
1164  }
1165 
1173  {
1174  options._false_value = std::move(val);
1175  return *this;
1176  }
1177 
1181  operator json_writer_options&&() { return std::move(options); }
1182 
1190  json_writer_options&& build() { return std::move(options); }
1191 };
1192 
1210 void write_json(json_writer_options const& options,
1212  // end of group
1214 } // namespace io
1215 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
Builds settings to use for read_json().
Definition: io/json.hpp:555
json_reader_options_builder & normalize_single_quotes(bool val)
Set whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:730
json_reader_options_builder & nonnumeric_numbers(bool val)
Set whether specific unquoted number values are valid JSON. The values are NaN, +INF,...
Definition: io/json.hpp:798
json_reader_options_builder & keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:717
json_reader_options_builder & normalize_whitespace(bool val)
Set whether the reader should normalize unquoted whitespace.
Definition: io/json.hpp:743
json_reader_options_builder & numeric_leading_zeros(bool val)
Set Whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:782
json_reader_options_builder & dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:704
json_reader_options_builder & recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:755
json_reader_options_builder & na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:825
json_reader_options_builder & delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:651
json_reader_options_builder & prune_columns(bool val)
Set whether to prune columns on read, selected based on the dtypes option.
Definition: io/json.hpp:692
json_reader_options_builder & lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:663
json_reader_options_builder & dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:579
json_reader_options && build()
move json_reader_options member once it's built.
Definition: io/json.hpp:843
json_reader_options_builder & mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:676
json_reader_options_builder & unquoted_control_chars(bool val)
Set whether chars >= 0 and < 32 are allowed in a quoted string without some form of escaping....
Definition: io/json.hpp:813
json_reader_options_builder & compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:615
json_reader_options_builder(source_info src)
Constructor from source info.
Definition: io/json.hpp:571
json_reader_options_builder & strict_validation(bool val)
Set whether json validation should be strict or not.
Definition: io/json.hpp:767
json_reader_options_builder & byte_range_size(size_type size)
Set number of bytes to read.
Definition: io/json.hpp:639
json_reader_options_builder & dtypes(std::map< std::string, schema_element > types)
Set data types for columns to be read.
Definition: io/json.hpp:603
json_reader_options_builder & byte_range_offset(size_type offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:627
json_reader_options_builder()=default
Default constructor.
json_reader_options_builder & dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:591
Input arguments to the read_json interface.
Definition: io/json.hpp:89
bool is_allowed_nonnumeric_numbers() const
Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:342
void enable_mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:443
void set_compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:390
void set_dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:369
void allow_unquoted_control_chars(bool val)
Set whether in a quoted string should characters greater than or equal to 0 and less than 32 be allow...
Definition: io/json.hpp:538
void enable_normalize_single_quotes(bool val)
Set whether the reader should enable normalization of single quotes around strings.
Definition: io/json.hpp:477
bool is_allowed_numeric_leading_zeros() const
Whether leading zeros are allowed in numeric values.
Definition: io/json.hpp:329
void enable_prune_columns(bool val)
Set whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:454
bool is_enabled_keep_quotes() const
Whether the reader should keep quotes of string values.
Definition: io/json.hpp:292
void enable_normalize_whitespace(bool val)
Set whether the reader should enable normalization of unquoted whitespace.
Definition: io/json.hpp:485
void allow_nonnumeric_numbers(bool val)
Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:523
size_t get_byte_range_offset() const
Returns number of bytes to skip from source start.
Definition: io/json.hpp:202
source_info const & get_source() const
Returns source info.
Definition: io/json.hpp:175
void set_dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:376
bool is_enabled_prune_columns() const
Whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:278
char get_delimiter() const
Returns delimiter separating records in JSON lines.
Definition: io/json.hpp:252
bool is_enabled_lines() const
Whether to read the file as a json object per line.
Definition: io/json.hpp:259
void allow_numeric_leading_zeros(bool val)
Set whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:509
void set_strict_validation(bool val)
Set whether strict validation is enabled or not.
Definition: io/json.hpp:499
bool is_enabled_mixed_types_as_string() const
Whether to parse mixed types as a string column.
Definition: io/json.hpp:266
json_reader_options()=default
Default constructor.
void set_na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:549
void enable_dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:461
std::variant< std::vector< data_type >, std::map< std::string, data_type >, std::map< std::string, schema_element > > const & get_dtypes() const
Returns data types of the columns.
Definition: io/json.hpp:185
size_t get_byte_range_size_with_padding() const
Returns number of bytes to read with padding.
Definition: io/json.hpp:216
void set_recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:492
bool is_enabled_normalize_whitespace() const
Whether the reader should normalize unquoted whitespace characters.
Definition: io/json.hpp:306
bool is_strict_validation() const
Whether json validation should be enforced strictly or not.
Definition: io/json.hpp:320
void set_delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:411
void set_byte_range_offset(size_t offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:397
void enable_lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:435
void enable_keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:469
bool is_enabled_normalize_single_quotes() const
Whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:299
compression_type get_compression() const
Returns compression format of the source.
Definition: io/json.hpp:195
void set_dtypes(std::map< std::string, schema_element > types)
Set data types for a potentially nested column hierarchy.
Definition: io/json.hpp:383
size_t get_byte_range_size() const
Returns number of bytes to read.
Definition: io/json.hpp:209
json_recovery_mode_t recovery_mode() const
Queries the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:313
static json_reader_options_builder builder(source_info src)
create json_reader_options_builder which will build json_reader_options.
bool is_enabled_dayfirst() const
Whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:285
size_t get_byte_range_padding() const
Returns number of bytes to pad when reading.
Definition: io/json.hpp:230
bool is_allowed_unquoted_control_chars() const
Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed w...
Definition: io/json.hpp:352
std::vector< std::string > const & get_na_values() const
Returns additional values to recognize as null values.
Definition: io/json.hpp:362
void set_byte_range_size(size_t size)
Set number of bytes to read.
Definition: io/json.hpp:404
Builder to build options for writer_json()
Definition: io/json.hpp:1060
json_writer_options_builder & include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1124
json_writer_options_builder & table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1088
json_writer_options_builder()=default
Default constructor.
json_writer_options_builder & rows_per_chunk(int val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1148
json_writer_options_builder & true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1160
json_writer_options_builder & false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1172
json_writer_options_builder(sink_info const &sink, table_view const &table)
Constructor from sink and table.
Definition: io/json.hpp:1077
json_writer_options_builder & na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1112
json_writer_options_builder & metadata(table_metadata metadata)
Sets optional metadata (with column names).
Definition: io/json.hpp:1100
json_writer_options && build()
move json_writer_options member once it's built.
Definition: io/json.hpp:1190
json_writer_options_builder & lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1136
Settings to use for write_json().
Definition: io/json.hpp:884
table_view const & get_table() const
Returns table that would be written to output.
Definition: io/json.hpp:947
void set_false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1054
void enable_include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1026
bool is_enabled_include_nulls() const
Whether to output nulls as 'null'.
Definition: io/json.hpp:968
void enable_lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1033
void set_na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1019
static json_writer_options_builder builder(sink_info const &sink, table_view const &table)
Create builder to create json_writer_options.
json_writer_options()=default
Default constructor.
void set_true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1047
sink_info const & get_sink() const
Returns sink used for writer output.
Definition: io/json.hpp:940
void set_rows_per_chunk(size_type val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1040
std::string const & get_true_value() const
Returns string used for values != 0 in INT8 types.
Definition: io/json.hpp:989
void set_table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1005
std::string const & get_false_value() const
Returns string used for values == 0 in INT8 types.
Definition: io/json.hpp:996
bool is_enabled_lines() const
Whether to use JSON lines for records format.
Definition: io/json.hpp:975
size_type get_rows_per_chunk() const
Returns maximum number of rows to process for each file write.
Definition: io/json.hpp:982
std::optional< table_metadata > const & get_metadata() const
Returns metadata information.
Definition: io/json.hpp:954
std::string const & get_na_rep() const
Returns string to used for null entries.
Definition: io/json.hpp:961
void set_metadata(table_metadata metadata)
Sets metadata.
Definition: io/json.hpp:1012
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
size_type num_rows() const noexcept
Returns the number of rows.
Definition: table.hpp:93
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_json(json_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads a JSON dataset into a set of columns.
json_recovery_mode_t
Control the error recovery behavior of the json parser.
Definition: io/json.hpp:61
@ RECOVER_WITH_NULL
Recovers from an error, replacing invalid records with null.
@ FAIL
Does not recover from an error when encountering an invalid format.
compression_type
Compression algorithms.
Definition: io/types.hpp:57
void write_json(json_writer_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream())
Writes a set of columns to JSON format.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: aggregation.hpp:35
Allows specifying the target types for nested JSON data via json_reader_options' set_dtypes method.
Definition: io/json.hpp:46
data_type type
The type that this column should be converted to.
Definition: io/json.hpp:50
std::map< std::string, schema_element > child_types
Allows specifying this column's child columns target type.
Definition: io/json.hpp:55
Destination information for write interfaces.
Definition: io/types.hpp:512
Source information for read interfaces.
Definition: io/types.hpp:337
Table metadata returned by IO readers.
Definition: io/types.hpp:277
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Class definitions for (mutable)_table_view
Type declarations for libcudf.