orc_types.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/utilities/export.hpp>
20 
21 #include <cstdint>
22 
23 namespace CUDF_EXPORT cudf {
24 namespace io::orc {
34 enum CompressionKind : uint8_t {
35  NONE = 0,
36  ZLIB = 1,
37  SNAPPY = 2,
38  LZO = 3,
39  LZ4 = 4,
40  ZSTD = 5,
41 };
42 
46 enum TypeKind : int8_t {
47  INVALID_TYPE_KIND = -1,
48  BOOLEAN = 0,
49  BYTE = 1,
50  SHORT = 2,
51  INT = 3,
52  LONG = 4,
53  FLOAT = 5,
54  DOUBLE = 6,
55  STRING = 7,
56  BINARY = 8,
57  TIMESTAMP = 9,
58  LIST = 10,
59  MAP = 11,
60  STRUCT = 12,
61  UNION = 13,
62  DECIMAL = 14,
63  DATE = 15,
64  VARCHAR = 16,
65  CHAR = 17,
66 };
67 
71 enum StreamKind : int8_t {
72  INVALID_STREAM_KIND = -1,
73  PRESENT = 0, // boolean stream of whether the next value is non-null
74  DATA = 1, // the primary data stream
75  LENGTH = 2, // the length of each value for variable length data
76  DICTIONARY_DATA = 3, // the dictionary blob
77  DICTIONARY_COUNT = 4, // deprecated prior to Hive 0.11
78  SECONDARY = 5, // a secondary data stream
79  ROW_INDEX = 6, // the index for seeking to particular row groups
80  BLOOM_FILTER = 7, // original bloom filters used before ORC-101
81  BLOOM_FILTER_UTF8 = 8, // bloom filters that consistently use utf8
82 };
83 
87 enum ColumnEncodingKind : int8_t {
88  INVALID_ENCODING_KIND = -1,
89  DIRECT = 0, // the encoding is mapped directly to the stream using RLE v1
90  DICTIONARY = 1, // the encoding uses a dictionary of unique values using RLE v1
91  DIRECT_V2 = 2, // the encoding is direct using RLE v2
92  DICTIONARY_V2 = 3, // the encoding is dictionary-based using RLE v2
93 };
94 
98 enum ProtofType : uint8_t {
99  VARINT = 0,
100  FIXED64 = 1,
101  FIXEDLEN = 2,
102  START_GROUP = 3, // deprecated
103  END_GROUP = 4, // deprecated
104  FIXED32 = 5,
105  INVALID_6 = 6,
106  INVALID_7 = 7,
107 };
108  // end of group
110 } // namespace io::orc
111 } // namespace CUDF_EXPORT cudf
CompressionKind
Identifies a compression algorithm.
Definition: orc_types.hpp:34
ColumnEncodingKind
Identifies the encoding of columns.
Definition: orc_types.hpp:87
StreamKind
Identifies the type of data stream.
Definition: orc_types.hpp:71
ProtofType
Identifies the type of encoding in a protocol buffer.
Definition: orc_types.hpp:98
TypeKind
Identifies a data type in an orc file.
Definition: orc_types.hpp:46
cuDF interfaces
Definition: aggregation.hpp:35