arrow_io_source.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include "datasource.hpp"
20 
21 #include <arrow/filesystem/filesystem.h>
22 #include <arrow/io/interfaces.h>
23 
24 #include <memory>
25 #include <string>
26 
27 namespace cudf::io {
38 class arrow_io_source : public datasource {
39  public:
45  explicit arrow_io_source(std::string const& arrow_uri);
46 
52  explicit arrow_io_source(std::shared_ptr<arrow::io::RandomAccessFile> file) : arrow_file(file) {}
53 
61  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override;
62 
71  size_t host_read(size_t offset, size_t size, uint8_t* dst) override;
77  [[nodiscard]] size_t size() const override;
78 
79  private:
80  std::shared_ptr<arrow::fs::FileSystem> filesystem;
81  std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;
82 };
83  // end of group
85 } // namespace cudf::io
Implementation class for reading from an Apache Arrow file. The file could be a memory-mapped file or...
arrow_io_source(std::string const &arrow_uri)
Constructs an object from an Apache Arrow Filesystem URI.
size_t host_read(size_t offset, size_t size, uint8_t *dst) override
Reads a selected range from the arrow source into a preallocated buffer.
arrow_io_source(std::shared_ptr< arrow::io::RandomAccessFile > file)
Constructs an object from an arrow source object.
size_t size() const override
Returns the size of the data in the arrow source.
std::unique_ptr< buffer > host_read(size_t offset, size_t size) override
Returns a buffer with a subset of data from the arrow source.
Interface class for providing input data to the readers.
Definition: datasource.hpp:41
IO interfaces.