io/json.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "types.hpp"
20 
22 #include <cudf/types.hpp>
23 #include <cudf/utilities/error.hpp>
25 
26 #include <map>
27 #include <string>
28 #include <utility>
29 #include <variant>
30 #include <vector>
31 
32 namespace CUDF_EXPORT cudf {
33 namespace io {
40 class json_reader_options_builder;
41 
51 
55  std::map<std::string, schema_element> child_types;
56 };
57 
62  FAIL,
64 };
65 
90  source_info _source;
91 
92  // Data types of the column; empty to infer dtypes
93  std::variant<std::vector<data_type>,
94  std::map<std::string, data_type>,
95  std::map<std::string, schema_element>>
96  _dtypes;
97  // Specify the compression format of the source or infer from file extension
98  compression_type _compression = compression_type::AUTO;
99 
100  // Read the file as a json object per line
101  bool _lines = false;
102  // Parse mixed types as a string column
103  bool _mixed_types_as_string = false;
104  // Delimiter separating records in JSON lines
105  char _delimiter = '\n';
106  // Prune columns on read, selected based on the _dtypes option
107  bool _prune_columns = false;
108  // Experimental features: new column tree construction
109  bool _experimental = false;
110 
111  // Bytes to skip from the start
112  size_t _byte_range_offset = 0;
113  // Bytes to read; always reads complete rows
114  size_t _byte_range_size = 0;
115 
116  // Whether to parse dates as DD/MM versus MM/DD
117  bool _dayfirst = false;
118 
119  // Whether to use the legacy reader
120  bool _legacy = false;
121 
122  // Whether to keep the quote characters of string values
123  bool _keep_quotes = false;
124 
125  // Normalize single quotes
126  bool _normalize_single_quotes = false;
127 
128  // Normalize unquoted spaces and tabs
129  bool _normalize_whitespace = false;
130 
131  // Whether to recover after an invalid JSON line
132  json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL;
133 
134  // Validation checks for spark
135  // Should the json validation be strict or not
136  // Note: strict validation enforces the JSON specification https://www.json.org/json-en.html
137  bool _strict_validation = false;
138  // Allow leading zeros for numeric values.
139  bool _allow_numeric_leading_zeros = true;
140  // Allow non-numeric numbers: NaN, +INF, -INF, +Infinity, Infinity, -Infinity
141  bool _allow_nonnumeric_numbers = true;
142  // Allow unquoted control characters
143  bool _allow_unquoted_control_chars = true;
144  // Additional values to recognize as null values
145  std::vector<std::string> _na_values;
146 
152  explicit json_reader_options(source_info src) : _source{std::move(src)} {}
153 
155 
156  public:
162  json_reader_options() = default;
163 
171 
177  [[nodiscard]] source_info const& get_source() const { return _source; }
178 
184  [[nodiscard]] std::variant<std::vector<data_type>,
185  std::map<std::string, data_type>,
186  std::map<std::string, schema_element>> const&
187  get_dtypes() const
188  {
189  return _dtypes;
190  }
191 
197  [[nodiscard]] compression_type get_compression() const { return _compression; }
198 
204  [[nodiscard]] size_t get_byte_range_offset() const { return _byte_range_offset; }
205 
211  [[nodiscard]] size_t get_byte_range_size() const { return _byte_range_size; }
212 
218  [[nodiscard]] size_t get_byte_range_size_with_padding() const
219  {
220  if (_byte_range_size == 0) {
221  return 0;
222  } else {
223  return _byte_range_size + get_byte_range_padding();
224  }
225  }
226 
232  [[nodiscard]] size_t get_byte_range_padding() const
233  {
234  auto const num_columns = std::visit([](auto const& dtypes) { return dtypes.size(); }, _dtypes);
235 
236  auto const max_row_bytes = 16 * 1024; // 16KB
237  auto const column_bytes = 64;
238  auto const base_padding = 1024; // 1KB
239 
240  if (num_columns == 0) {
241  // Use flat size if the number of columns is not known
242  return max_row_bytes;
243  }
244 
245  // Expand the size based on the number of columns, if available
246  return base_padding + num_columns * column_bytes;
247  }
248 
254  [[nodiscard]] char get_delimiter() const { return _delimiter; }
255 
261  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
262 
268  [[nodiscard]] bool is_enabled_mixed_types_as_string() const { return _mixed_types_as_string; }
269 
280  [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; }
281 
289  [[nodiscard]] bool is_enabled_experimental() const { return _experimental; }
290 
296  [[nodiscard]] bool is_enabled_dayfirst() const { return _dayfirst; }
297 
303  [[nodiscard]] bool is_enabled_keep_quotes() const { return _keep_quotes; }
304 
310  [[nodiscard]] bool is_enabled_normalize_single_quotes() const { return _normalize_single_quotes; }
311 
317  [[nodiscard]] bool is_enabled_normalize_whitespace() const { return _normalize_whitespace; }
318 
324  [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; }
325 
331  [[nodiscard]] bool is_strict_validation() const { return _strict_validation; }
332 
340  [[nodiscard]] bool is_allowed_numeric_leading_zeros() const
341  {
342  return _allow_numeric_leading_zeros;
343  }
344 
353  [[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; }
354 
363  [[nodiscard]] bool is_allowed_unquoted_control_chars() const
364  {
365  return _allow_unquoted_control_chars;
366  }
367 
373  [[nodiscard]] std::vector<std::string> const& get_na_values() const { return _na_values; }
374 
380  void set_dtypes(std::vector<data_type> types) { _dtypes = std::move(types); }
381 
387  void set_dtypes(std::map<std::string, data_type> types) { _dtypes = std::move(types); }
388 
394  void set_dtypes(std::map<std::string, schema_element> types) { _dtypes = std::move(types); }
395 
401  void set_compression(compression_type comp_type) { _compression = comp_type; }
402 
408  void set_byte_range_offset(size_t offset) { _byte_range_offset = offset; }
409 
415  void set_byte_range_size(size_t size) { _byte_range_size = size; }
416 
422  void set_delimiter(char delimiter)
423  {
424  switch (delimiter) {
425  case '{':
426  case '[':
427  case '}':
428  case ']':
429  case ',':
430  case ':':
431  case '"':
432  case '\'':
433  case '\\':
434  case ' ':
435  case '\t':
436  case '\r': CUDF_FAIL("Unsupported delimiter character.", std::invalid_argument); break;
437  }
438  _delimiter = delimiter;
439  }
440 
446  void enable_lines(bool val) { _lines = val; }
447 
454  void enable_mixed_types_as_string(bool val) { _mixed_types_as_string = val; }
455 
465  void enable_prune_columns(bool val) { _prune_columns = val; }
466 
475  void enable_experimental(bool val) { _experimental = val; }
476 
482  void enable_dayfirst(bool val) { _dayfirst = val; }
483 
490  void enable_keep_quotes(bool val) { _keep_quotes = val; }
491 
498  void enable_normalize_single_quotes(bool val) { _normalize_single_quotes = val; }
499 
506  void enable_normalize_whitespace(bool val) { _normalize_whitespace = val; }
507 
513  void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; }
514 
520  void set_strict_validation(bool val) { _strict_validation = val; }
521 
531  {
532  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
533  _allow_numeric_leading_zeros = val;
534  }
535 
545  {
546  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
547  _allow_nonnumeric_numbers = val;
548  }
549 
560  {
561  CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work.");
562  _allow_unquoted_control_chars = val;
563  }
564 
570  void set_na_values(std::vector<std::string> vals) { _na_values = std::move(vals); }
571 };
572 
577  json_reader_options options;
578 
579  public:
585  explicit json_reader_options_builder() = default;
586 
592  explicit json_reader_options_builder(source_info src) : options{std::move(src)} {}
593 
600  json_reader_options_builder& dtypes(std::vector<data_type> types)
601  {
602  options._dtypes = std::move(types);
603  return *this;
604  }
605 
612  json_reader_options_builder& dtypes(std::map<std::string, data_type> types)
613  {
614  options._dtypes = std::move(types);
615  return *this;
616  }
617 
624  json_reader_options_builder& dtypes(std::map<std::string, schema_element> types)
625  {
626  options._dtypes = std::move(types);
627  return *this;
628  }
629 
637  {
638  options._compression = comp_type;
639  return *this;
640  }
641 
649  {
650  options._byte_range_offset = offset;
651  return *this;
652  }
653 
661  {
662  options._byte_range_size = size;
663  return *this;
664  }
665 
673  {
674  options.set_delimiter(delimiter);
675  return *this;
676  }
677 
685  {
686  options._lines = val;
687  return *this;
688  }
689 
698  {
699  options._mixed_types_as_string = val;
700  return *this;
701  }
702 
714  {
715  options._prune_columns = val;
716  return *this;
717  }
718 
729  {
730  options._experimental = val;
731  return *this;
732  }
733 
741  {
742  options._dayfirst = val;
743  return *this;
744  }
745 
754  {
755  options._keep_quotes = val;
756  return *this;
757  }
758 
767  {
768  options._normalize_single_quotes = val;
769  return *this;
770  }
771 
780  {
781  options._normalize_whitespace = val;
782  return *this;
783  }
784 
792  {
793  options._recovery_mode = val;
794  return *this;
795  }
796 
804  {
805  options.set_strict_validation(val);
806  return *this;
807  }
808 
819  {
820  options.allow_numeric_leading_zeros(val);
821  return *this;
822  }
823 
835  {
836  options.allow_nonnumeric_numbers(val);
837  return *this;
838  }
839 
850  {
851  options.allow_unquoted_control_chars(val);
852  return *this;
853  }
854 
861  json_reader_options_builder& na_values(std::vector<std::string> vals)
862  {
863  options.set_na_values(std::move(vals));
864  return *this;
865  }
866 
870  operator json_reader_options&&() { return std::move(options); }
871 
879  json_reader_options&& build() { return std::move(options); }
880 };
881 
900  json_reader_options options,
903  // end of group
905 
916 
921  // Specify the sink to use for writer output
922  sink_info _sink;
923  // maximum number of rows to write in each chunk (limits memory use)
924  size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
925  // Set of columns to output
926  table_view _table;
927  // string to use for null entries
928  std::string _na_rep = "";
929  // Indicates whether to output nulls as 'null' or exclude the field
930  bool _include_nulls = false;
931  // Indicates whether to use JSON lines for records format
932  bool _lines = false;
933  // string to use for values != 0 in INT8 types (default 'true')
934  std::string _true_value = std::string{"true"};
935  // string to use for values == 0 in INT8 types (default 'false')
936  std::string _false_value = std::string{"false"};
937  // Names of all columns; if empty, writer will generate column names
938  std::optional<table_metadata> _metadata; // Optional column names
939 
947  : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table))
948  {
949  }
950 
952 
953  public:
959  explicit json_writer_options() = default;
960 
970 
976  [[nodiscard]] sink_info const& get_sink() const { return _sink; }
977 
983  [[nodiscard]] table_view const& get_table() const { return _table; }
984 
990  [[nodiscard]] std::optional<table_metadata> const& get_metadata() const { return _metadata; }
991 
997  [[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }
998 
1004  [[nodiscard]] bool is_enabled_include_nulls() const { return _include_nulls; }
1005 
1011  [[nodiscard]] bool is_enabled_lines() const { return _lines; }
1012 
1018  [[nodiscard]] size_type get_rows_per_chunk() const { return _rows_per_chunk; }
1019 
1025  [[nodiscard]] std::string const& get_true_value() const { return _true_value; }
1026 
1032  [[nodiscard]] std::string const& get_false_value() const { return _false_value; }
1033 
1034  // Setter
1035 
1041  void set_table(table_view tbl) { _table = tbl; }
1042 
1048  void set_metadata(table_metadata metadata) { _metadata = std::move(metadata); }
1049 
1055  void set_na_rep(std::string val) { _na_rep = std::move(val); }
1056 
1062  void enable_include_nulls(bool val) { _include_nulls = val; }
1063 
1069  void enable_lines(bool val) { _lines = val; }
1070 
1076  void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; }
1077 
1083  void set_true_value(std::string val) { _true_value = std::move(val); }
1084 
1090  void set_false_value(std::string val) { _false_value = std::move(val); }
1091 };
1092 
1097  json_writer_options options;
1098 
1099  public:
1105  explicit json_writer_options_builder() = default;
1106 
1114  : options{sink, table}
1115  {
1116  }
1117 
1125  {
1126  options._table = tbl;
1127  return *this;
1128  }
1129 
1137  {
1138  options._metadata = std::move(metadata);
1139  return *this;
1140  }
1141 
1149  {
1150  options._na_rep = std::move(val);
1151  return *this;
1152  };
1153 
1161  {
1162  options._include_nulls = val;
1163  return *this;
1164  }
1165 
1173  {
1174  options._lines = val;
1175  return *this;
1176  }
1177 
1185  {
1186  options._rows_per_chunk = val;
1187  return *this;
1188  }
1189 
1197  {
1198  options._true_value = std::move(val);
1199  return *this;
1200  }
1201 
1209  {
1210  options._false_value = std::move(val);
1211  return *this;
1212  }
1213 
1217  operator json_writer_options&&() { return std::move(options); }
1218 
1226  json_writer_options&& build() { return std::move(options); }
1227 };
1228 
1246 void write_json(json_writer_options const& options,
1248  // end of group
1250 } // namespace io
1251 } // namespace CUDF_EXPORT cudf
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
Builds settings to use for read_json().
Definition: io/json.hpp:576
json_reader_options_builder & normalize_single_quotes(bool val)
Set whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:766
json_reader_options_builder & nonnumeric_numbers(bool val)
Set whether specific unquoted number values are valid JSON. The values are NaN, +INF,...
Definition: io/json.hpp:834
json_reader_options_builder & keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:753
json_reader_options_builder & normalize_whitespace(bool val)
Set whether the reader should normalize unquoted whitespace.
Definition: io/json.hpp:779
json_reader_options_builder & numeric_leading_zeros(bool val)
Set Whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:818
json_reader_options_builder & dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:740
json_reader_options_builder & recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:791
json_reader_options_builder & na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:861
json_reader_options_builder & delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:672
json_reader_options_builder & prune_columns(bool val)
Set whether to prune columns on read, selected based on the dtypes option.
Definition: io/json.hpp:713
json_reader_options_builder & experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:728
json_reader_options_builder & lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:684
json_reader_options_builder & dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:600
json_reader_options && build()
move json_reader_options member once it's built.
Definition: io/json.hpp:879
json_reader_options_builder & mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:697
json_reader_options_builder & unquoted_control_chars(bool val)
Set whether chars >= 0 and < 32 are allowed in a quoted string without some form of escaping....
Definition: io/json.hpp:849
json_reader_options_builder & compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:636
json_reader_options_builder(source_info src)
Constructor from source info.
Definition: io/json.hpp:592
json_reader_options_builder & strict_validation(bool val)
Set whether json validation should be strict or not.
Definition: io/json.hpp:803
json_reader_options_builder & byte_range_size(size_type size)
Set number of bytes to read.
Definition: io/json.hpp:660
json_reader_options_builder & dtypes(std::map< std::string, schema_element > types)
Set data types for columns to be read.
Definition: io/json.hpp:624
json_reader_options_builder & byte_range_offset(size_type offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:648
json_reader_options_builder()=default
Default constructor.
json_reader_options_builder & dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:612
Input arguments to the read_json interface.
Definition: io/json.hpp:89
bool is_allowed_nonnumeric_numbers() const
Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:353
void enable_mixed_types_as_string(bool val)
Set whether to parse mixed types as a string column. Also enables forcing to read a struct as string ...
Definition: io/json.hpp:454
void set_compression(compression_type comp_type)
Set the compression type.
Definition: io/json.hpp:401
void set_dtypes(std::vector< data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:380
void allow_unquoted_control_chars(bool val)
Set whether in a quoted string should characters greater than or equal to 0 and less than 32 be allow...
Definition: io/json.hpp:559
void enable_normalize_single_quotes(bool val)
Set whether the reader should enable normalization of single quotes around strings.
Definition: io/json.hpp:498
bool is_allowed_numeric_leading_zeros() const
Whether leading zeros are allowed in numeric values.
Definition: io/json.hpp:340
void enable_prune_columns(bool val)
Set whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:465
bool is_enabled_keep_quotes() const
Whether the reader should keep quotes of string values.
Definition: io/json.hpp:303
void enable_normalize_whitespace(bool val)
Set whether the reader should enable normalization of unquoted whitespace.
Definition: io/json.hpp:506
void allow_nonnumeric_numbers(bool val)
Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity,...
Definition: io/json.hpp:544
size_t get_byte_range_offset() const
Returns number of bytes to skip from source start.
Definition: io/json.hpp:204
source_info const & get_source() const
Returns source info.
Definition: io/json.hpp:177
void enable_experimental(bool val)
Set whether to enable experimental features.
Definition: io/json.hpp:475
void set_dtypes(std::map< std::string, data_type > types)
Set data types for columns to be read.
Definition: io/json.hpp:387
bool is_enabled_prune_columns() const
Whether to prune columns on read, selected based on the set_dtypes option.
Definition: io/json.hpp:280
char get_delimiter() const
Returns delimiter separating records in JSON lines.
Definition: io/json.hpp:254
bool is_enabled_lines() const
Whether to read the file as a json object per line.
Definition: io/json.hpp:261
void allow_numeric_leading_zeros(bool val)
Set whether leading zeros are allowed in numeric values. Strict validation must be enabled for this t...
Definition: io/json.hpp:530
void set_strict_validation(bool val)
Set whether strict validation is enabled or not.
Definition: io/json.hpp:520
bool is_enabled_mixed_types_as_string() const
Whether to parse mixed types as a string column.
Definition: io/json.hpp:268
json_reader_options()=default
Default constructor.
void set_na_values(std::vector< std::string > vals)
Sets additional values to recognize as null values.
Definition: io/json.hpp:570
void enable_dayfirst(bool val)
Set whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:482
std::variant< std::vector< data_type >, std::map< std::string, data_type >, std::map< std::string, schema_element > > const & get_dtypes() const
Returns data types of the columns.
Definition: io/json.hpp:187
size_t get_byte_range_size_with_padding() const
Returns number of bytes to read with padding.
Definition: io/json.hpp:218
void set_recovery_mode(json_recovery_mode_t val)
Specifies the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:513
bool is_enabled_normalize_whitespace() const
Whether the reader should normalize unquoted whitespace characters.
Definition: io/json.hpp:317
bool is_strict_validation() const
Whether json validation should be enforced strictly or not.
Definition: io/json.hpp:331
void set_delimiter(char delimiter)
Set delimiter separating records in JSON lines.
Definition: io/json.hpp:422
void set_byte_range_offset(size_t offset)
Set number of bytes to skip from source start.
Definition: io/json.hpp:408
void enable_lines(bool val)
Set whether to read the file as a json object per line.
Definition: io/json.hpp:446
void enable_keep_quotes(bool val)
Set whether the reader should keep quotes of string values.
Definition: io/json.hpp:490
bool is_enabled_normalize_single_quotes() const
Whether the reader should normalize single quotes around strings.
Definition: io/json.hpp:310
compression_type get_compression() const
Returns compression format of the source.
Definition: io/json.hpp:197
void set_dtypes(std::map< std::string, schema_element > types)
Set data types for a potentially nested column hierarchy.
Definition: io/json.hpp:394
size_t get_byte_range_size() const
Returns number of bytes to read.
Definition: io/json.hpp:211
bool is_enabled_experimental() const
Whether to enable experimental features.
Definition: io/json.hpp:289
json_recovery_mode_t recovery_mode() const
Queries the JSON reader's behavior on invalid JSON lines.
Definition: io/json.hpp:324
static json_reader_options_builder builder(source_info src)
create json_reader_options_builder which will build json_reader_options.
bool is_enabled_dayfirst() const
Whether to parse dates as DD/MM versus MM/DD.
Definition: io/json.hpp:296
size_t get_byte_range_padding() const
Returns number of bytes to pad when reading.
Definition: io/json.hpp:232
bool is_allowed_unquoted_control_chars() const
Whether in a quoted string should characters greater than or equal to 0 and less than 32 be allowed w...
Definition: io/json.hpp:363
std::vector< std::string > const & get_na_values() const
Returns additional values to recognize as null values.
Definition: io/json.hpp:373
void set_byte_range_size(size_t size)
Set number of bytes to read.
Definition: io/json.hpp:415
Builder to build options for writer_json()
Definition: io/json.hpp:1096
json_writer_options_builder & include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1160
json_writer_options_builder & table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1124
json_writer_options_builder()=default
Default constructor.
json_writer_options_builder & rows_per_chunk(int val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1184
json_writer_options_builder & true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1196
json_writer_options_builder & false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1208
json_writer_options_builder(sink_info const &sink, table_view const &table)
Constructor from sink and table.
Definition: io/json.hpp:1113
json_writer_options_builder & na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1148
json_writer_options_builder & metadata(table_metadata metadata)
Sets optional metadata (with column names).
Definition: io/json.hpp:1136
json_writer_options && build()
move json_writer_options member once it's built.
Definition: io/json.hpp:1226
json_writer_options_builder & lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1172
Settings to use for write_json().
Definition: io/json.hpp:920
table_view const & get_table() const
Returns table that would be written to output.
Definition: io/json.hpp:983
void set_false_value(std::string val)
Sets string used for values == 0 in INT8 types.
Definition: io/json.hpp:1090
void enable_include_nulls(bool val)
Enables/Disables output of nulls as 'null'.
Definition: io/json.hpp:1062
bool is_enabled_include_nulls() const
Whether to output nulls as 'null'.
Definition: io/json.hpp:1004
void enable_lines(bool val)
Enables/Disables JSON lines for records format.
Definition: io/json.hpp:1069
void set_na_rep(std::string val)
Sets string to used for null entries.
Definition: io/json.hpp:1055
static json_writer_options_builder builder(sink_info const &sink, table_view const &table)
Create builder to create json_writer_options.
json_writer_options()=default
Default constructor.
void set_true_value(std::string val)
Sets string used for values != 0 in INT8 types.
Definition: io/json.hpp:1083
sink_info const & get_sink() const
Returns sink used for writer output.
Definition: io/json.hpp:976
void set_rows_per_chunk(size_type val)
Sets maximum number of rows to process for each file write.
Definition: io/json.hpp:1076
std::string const & get_true_value() const
Returns string used for values != 0 in INT8 types.
Definition: io/json.hpp:1025
void set_table(table_view tbl)
Sets table to be written to output.
Definition: io/json.hpp:1041
std::string const & get_false_value() const
Returns string used for values == 0 in INT8 types.
Definition: io/json.hpp:1032
bool is_enabled_lines() const
Whether to use JSON lines for records format.
Definition: io/json.hpp:1011
size_type get_rows_per_chunk() const
Returns maximum number of rows to process for each file write.
Definition: io/json.hpp:1018
std::optional< table_metadata > const & get_metadata() const
Returns metadata information.
Definition: io/json.hpp:990
std::string const & get_na_rep() const
Returns string to used for null entries.
Definition: io/json.hpp:997
void set_metadata(table_metadata metadata)
Sets metadata.
Definition: io/json.hpp:1048
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
size_type num_rows() const noexcept
Returns the number of rows.
Definition: table.hpp:93
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
table_with_metadata read_json(json_reader_options options, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Reads a JSON dataset into a set of columns.
json_recovery_mode_t
Control the error recovery behavior of the json parser.
Definition: io/json.hpp:61
@ RECOVER_WITH_NULL
Recovers from an error, replacing invalid records with null.
@ FAIL
Does not recover from an error when encountering an invalid format.
compression_type
Compression algorithms.
Definition: io/types.hpp:57
void write_json(json_writer_options const &options, rmm::cuda_stream_view stream=cudf::get_default_stream())
Writes a set of columns to JSON format.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: aggregation.hpp:35
Allows specifying the target types for nested JSON data via json_reader_options' set_dtypes method.
Definition: io/json.hpp:46
data_type type
The type that this column should be converted to.
Definition: io/json.hpp:50
std::map< std::string, schema_element > child_types
Allows specifying this column's child columns target type.
Definition: io/json.hpp:55
Destination information for write interfaces.
Definition: io/types.hpp:512
Source information for read interfaces.
Definition: io/types.hpp:337
Table metadata returned by IO readers.
Definition: io/types.hpp:277
Table with table metadata used by io readers to return the metadata by value.
Definition: io/types.hpp:292
Class definitions for (mutable)_table_view
Type declarations for libcudf.