arrow_io_source.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "datasource.hpp"
20 
21 #include <cudf/utilities/export.hpp>
22 
23 #include <arrow/filesystem/filesystem.h>
24 #include <arrow/io/interfaces.h>
25 
26 #include <memory>
27 #include <string>
28 #include <utility>
29 
30 namespace CUDF_EXPORT cudf {
31 namespace io {
42 class arrow_io_source : public datasource {
43  public:
49  explicit arrow_io_source(std::string const& arrow_uri);
50 
56  explicit arrow_io_source(std::shared_ptr<arrow::io::RandomAccessFile> file)
57  : arrow_file(std::move(file))
58  {
59  }
60 
68  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override;
69 
78  size_t host_read(size_t offset, size_t size, uint8_t* dst) override;
84  [[nodiscard]] size_t size() const override;
85 
86  private:
87  std::shared_ptr<arrow::fs::FileSystem> filesystem;
88  std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;
89 };
90  // end of group
92 } // namespace io
93 } // namespace CUDF_EXPORT cudf
Implementation class for reading from an Apache Arrow file. The file could be a memory-mapped file or...
arrow_io_source(std::string const &arrow_uri)
Constructs an object from an Apache Arrow Filesystem URI.
size_t host_read(size_t offset, size_t size, uint8_t *dst) override
Reads a selected range from the arrow source into a preallocated buffer.
arrow_io_source(std::shared_ptr< arrow::io::RandomAccessFile > file)
Constructs an object from an arrow source object.
size_t size() const override
Returns the size of the data in the arrow source.
std::unique_ptr< buffer > host_read(size_t offset, size_t size) override
Returns a buffer with a subset of data from the arrow source.
Interface class for providing input data to the readers.
Definition: datasource.hpp:42
cuDF interfaces
Definition: aggregation.hpp:35