remote_handle.hpp
1 /*
2  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cassert>
19 #include <cstddef>
20 #include <cstring>
21 #include <iostream>
22 #include <memory>
23 #include <optional>
24 #include <regex>
25 #include <sstream>
26 #include <stdexcept>
27 #include <string>
28 
29 #include <kvikio/defaults.hpp>
30 #include <kvikio/error.hpp>
31 #include <kvikio/parallel_operation.hpp>
32 #include <kvikio/posix_io.hpp>
33 #include <kvikio/utils.hpp>
34 
35 struct curl_slist;
36 
37 namespace kvikio {
38 
39 class CurlHandle; // Prototype
40 
50  public:
58  virtual void setopt(CurlHandle& curl) = 0;
59 
65  virtual std::string str() const = 0;
66 
67  virtual ~RemoteEndpoint() = default;
68 };
69 
73 class HttpEndpoint : public RemoteEndpoint {
74  private:
75  std::string _url;
76 
77  public:
83  HttpEndpoint(std::string url);
84  void setopt(CurlHandle& curl) override;
85  std::string str() const override;
86  ~HttpEndpoint() override = default;
87 };
88 
92 class S3Endpoint : public RemoteEndpoint {
93  private:
94  std::string _url;
95  std::string _aws_sigv4;
96  std::string _aws_userpwd;
97  curl_slist* _curl_header_list{};
98 
112  static std::string unwrap_or_default(std::optional<std::string> aws_arg,
113  std::string const& env_var,
114  std::string const& err_msg = "");
115 
116  public:
132  static std::string url_from_bucket_and_object(std::string bucket_name,
133  std::string object_name,
134  std::optional<std::string> aws_region,
135  std::optional<std::string> aws_endpoint_url);
136 
145  [[nodiscard]] static std::pair<std::string, std::string> parse_s3_url(std::string const& s3_url);
146 
162  S3Endpoint(std::string url,
163  std::optional<std::string> aws_region = std::nullopt,
164  std::optional<std::string> aws_access_key = std::nullopt,
165  std::optional<std::string> aws_secret_access_key = std::nullopt,
166  std::optional<std::string> aws_session_token = std::nullopt);
167 
185  S3Endpoint(std::pair<std::string, std::string> bucket_and_object_names,
186  std::optional<std::string> aws_region = std::nullopt,
187  std::optional<std::string> aws_access_key = std::nullopt,
188  std::optional<std::string> aws_secret_access_key = std::nullopt,
189  std::optional<std::string> aws_endpoint_url = std::nullopt,
190  std::optional<std::string> aws_session_token = std::nullopt);
191 
192  void setopt(CurlHandle& curl) override;
193  std::string str() const override;
194  ~S3Endpoint() override;
195 };
196 
201  private:
202  std::unique_ptr<RemoteEndpoint> _endpoint;
203  std::size_t _nbytes;
204 
205  public:
212  RemoteHandle(std::unique_ptr<RemoteEndpoint> endpoint, std::size_t nbytes);
213 
221  RemoteHandle(std::unique_ptr<RemoteEndpoint> endpoint);
222 
223  // A remote handle is moveable but not copyable.
224  RemoteHandle(RemoteHandle&& o) = default;
225  RemoteHandle& operator=(RemoteHandle&& o) = default;
226  RemoteHandle(RemoteHandle const&) = delete;
227  RemoteHandle& operator=(RemoteHandle const&) = delete;
228 
236  [[nodiscard]] std::size_t nbytes() const noexcept;
237 
243  [[nodiscard]] RemoteEndpoint const& endpoint() const noexcept;
244 
257  std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0);
258 
271  std::future<std::size_t> pread(void* buf,
272  std::size_t size,
273  std::size_t file_offset = 0,
274  std::size_t task_size = defaults::task_size());
275 };
276 
277 } // namespace kvikio
Representation of a curl easy handle pointer and its operations.
Definition: libcurl.hpp:91
A remote endpoint using http.
std::string str() const override
Get a description of this remote point instance.
HttpEndpoint(std::string url)
Create an http endpoint from a url.
void setopt(CurlHandle &curl) override
Set needed connection options on a curl handle.
Abstract base class for remote endpoints.
virtual void setopt(CurlHandle &curl)=0
Set needed connection options on a curl handle.
virtual std::string str() const =0
Get a description of this remote point instance.
Handle of remote file.
std::size_t read(void *buf, std::size_t size, std::size_t file_offset=0)
Read from remote source into buffer (host or device memory).
RemoteEndpoint const & endpoint() const noexcept
Get a const reference to the underlying remote endpoint.
std::size_t nbytes() const noexcept
Get the file size.
RemoteHandle(std::unique_ptr< RemoteEndpoint > endpoint)
Create a new remote handle from an endpoint (infers the file size).
std::future< std::size_t > pread(void *buf, std::size_t size, std::size_t file_offset=0, std::size_t task_size=defaults::task_size())
Read from remote source into buffer (host or device memory) in parallel.
RemoteHandle(std::unique_ptr< RemoteEndpoint > endpoint, std::size_t nbytes)
Create a new remote handle from an endpoint and a file size.
A remote endpoint using AWS's S3 protocol.
static std::string url_from_bucket_and_object(std::string bucket_name, std::string object_name, std::optional< std::string > aws_region, std::optional< std::string > aws_endpoint_url)
Get url from a AWS S3 bucket and object name.
S3Endpoint(std::pair< std::string, std::string > bucket_and_object_names, std::optional< std::string > aws_region=std::nullopt, std::optional< std::string > aws_access_key=std::nullopt, std::optional< std::string > aws_secret_access_key=std::nullopt, std::optional< std::string > aws_endpoint_url=std::nullopt, std::optional< std::string > aws_session_token=std::nullopt)
Create a S3 endpoint from a bucket and object name.
S3Endpoint(std::string url, std::optional< std::string > aws_region=std::nullopt, std::optional< std::string > aws_access_key=std::nullopt, std::optional< std::string > aws_secret_access_key=std::nullopt, std::optional< std::string > aws_session_token=std::nullopt)
Create a S3 endpoint from a url.
void setopt(CurlHandle &curl) override
Set needed connection options on a curl handle.
static std::pair< std::string, std::string > parse_s3_url(std::string const &s3_url)
Given an url like "s3://<bucket>/<object>", return the name of the bucket and object.
std::string str() const override
Get a description of this remote point instance.
Singleton class of default values used throughout KvikIO.
Definition: defaults.hpp:123
KvikIO namespace.
Definition: batch.hpp:27