orc_types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstdint>
20 
21 namespace cudf::io::orc {
31 enum CompressionKind : uint8_t {
32  NONE = 0,
33  ZLIB = 1,
34  SNAPPY = 2,
35  LZO = 3,
36  LZ4 = 4,
37  ZSTD = 5,
38 };
39 
43 enum TypeKind : int8_t {
44  INVALID_TYPE_KIND = -1,
45  BOOLEAN = 0,
46  BYTE = 1,
47  SHORT = 2,
48  INT = 3,
49  LONG = 4,
50  FLOAT = 5,
51  DOUBLE = 6,
52  STRING = 7,
53  BINARY = 8,
54  TIMESTAMP = 9,
55  LIST = 10,
56  MAP = 11,
57  STRUCT = 12,
58  UNION = 13,
59  DECIMAL = 14,
60  DATE = 15,
61  VARCHAR = 16,
62  CHAR = 17,
63 };
64 
68 enum StreamKind : int8_t {
69  INVALID_STREAM_KIND = -1,
70  PRESENT = 0, // boolean stream of whether the next value is non-null
71  DATA = 1, // the primary data stream
72  LENGTH = 2, // the length of each value for variable length data
73  DICTIONARY_DATA = 3, // the dictionary blob
74  DICTIONARY_COUNT = 4, // deprecated prior to Hive 0.11
75  SECONDARY = 5, // a secondary data stream
76  ROW_INDEX = 6, // the index for seeking to particular row groups
77  BLOOM_FILTER = 7, // original bloom filters used before ORC-101
78  BLOOM_FILTER_UTF8 = 8, // bloom filters that consistently use utf8
79 };
80 
84 enum ColumnEncodingKind : int8_t {
85  INVALID_ENCODING_KIND = -1,
86  DIRECT = 0, // the encoding is mapped directly to the stream using RLE v1
87  DICTIONARY = 1, // the encoding uses a dictionary of unique values using RLE v1
88  DIRECT_V2 = 2, // the encoding is direct using RLE v2
89  DICTIONARY_V2 = 3, // the encoding is dictionary-based using RLE v2
90 };
91 
95 enum ProtofType : uint8_t {
96  VARINT = 0,
97  FIXED64 = 1,
98  FIXEDLEN = 2,
99  START_GROUP = 3, // deprecated
100  END_GROUP = 4, // deprecated
101  FIXED32 = 5,
102  INVALID_6 = 6,
103  INVALID_7 = 7,
104 };
105  // end of group
107 } // namespace cudf::io::orc
CompressionKind
Identifies a compression algorithm.
Definition: orc_types.hpp:31
ColumnEncodingKind
Identifies the encoding of columns.
Definition: orc_types.hpp:84
StreamKind
Identifies the type of data stream.
Definition: orc_types.hpp:68
ProtofType
Identifies the type of encoding in a protocol buffer.
Definition: orc_types.hpp:95
TypeKind
Identifies a data type in an orc file.
Definition: orc_types.hpp:43
Orc I/O interfaces.