orc_types.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/utilities/export.hpp>
9 
10 #include <cstdint>
11 
12 namespace CUDF_EXPORT cudf {
13 namespace io::orc {
23 enum CompressionKind : uint8_t {
24  NONE = 0,
25  ZLIB = 1,
26  SNAPPY = 2,
27  LZO = 3,
28  LZ4 = 4,
29  ZSTD = 5,
30 };
31 
35 enum TypeKind : int8_t {
36  INVALID_TYPE_KIND = -1,
37  BOOLEAN = 0,
38  BYTE = 1,
39  SHORT = 2,
40  INT = 3,
41  LONG = 4,
42  FLOAT = 5,
43  DOUBLE = 6,
44  STRING = 7,
45  BINARY = 8,
46  TIMESTAMP = 9,
47  LIST = 10,
48  MAP = 11,
49  STRUCT = 12,
50  UNION = 13,
51  DECIMAL = 14,
52  DATE = 15,
53  VARCHAR = 16,
54  CHAR = 17,
55 };
56 
60 enum StreamKind : int8_t {
61  INVALID_STREAM_KIND = -1,
62  PRESENT = 0, // boolean stream of whether the next value is non-null
63  DATA = 1, // the primary data stream
64  LENGTH = 2, // the length of each value for variable length data
65  DICTIONARY_DATA = 3, // the dictionary blob
66  DICTIONARY_COUNT = 4, // deprecated prior to Hive 0.11
67  SECONDARY = 5, // a secondary data stream
68  ROW_INDEX = 6, // the index for seeking to particular row groups
69  BLOOM_FILTER = 7, // original bloom filters used before ORC-101
70  BLOOM_FILTER_UTF8 = 8, // bloom filters that consistently use utf8
71 };
72 
76 enum ColumnEncodingKind : int8_t {
77  INVALID_ENCODING_KIND = -1,
78  DIRECT = 0, // the encoding is mapped directly to the stream using RLE v1
79  DICTIONARY = 1, // the encoding uses a dictionary of unique values using RLE v1
80  DIRECT_V2 = 2, // the encoding is direct using RLE v2
81  DICTIONARY_V2 = 3, // the encoding is dictionary-based using RLE v2
82 };
83 
87 enum ProtofType : uint8_t {
88  VARINT = 0,
89  FIXED64 = 1,
90  FIXEDLEN = 2,
91  START_GROUP = 3, // deprecated
92  END_GROUP = 4, // deprecated
93  FIXED32 = 5,
94  INVALID_6 = 6,
95  INVALID_7 = 7,
96 };
97  // end of group
99 } // namespace io::orc
100 } // namespace CUDF_EXPORT cudf
CompressionKind
Identifies a compression algorithm.
Definition: orc_types.hpp:23
ColumnEncodingKind
Identifies the encoding of columns.
Definition: orc_types.hpp:76
StreamKind
Identifies the type of data stream.
Definition: orc_types.hpp:60
ProtofType
Identifies the type of encoding in a protocol buffer.
Definition: orc_types.hpp:87
TypeKind
Identifies a data type in an orc file.
Definition: orc_types.hpp:35
cuDF interfaces
Definition: host_udf.hpp:26