url.hpp
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <optional>
8 #include <string>
9 
10 #include <curl/curl.h>
11 
12 namespace kvikio::detail {
21  private:
22  CURLU* _handle{nullptr};
23 
24  public:
32 
36  ~CurlUrlHandle() noexcept;
37 
38  CurlUrlHandle(CurlUrlHandle const&) = delete;
39  CurlUrlHandle& operator=(CurlUrlHandle const&) = delete;
40 
41  CurlUrlHandle(CurlUrlHandle&& other) noexcept;
42  CurlUrlHandle& operator=(CurlUrlHandle&& other) noexcept;
43 
50  CURLU* get() const;
51 };
52 
73 class UrlParser {
74  public:
78  struct UrlComponents {
83  std::optional<std::string> scheme;
84 
89  std::optional<std::string> host;
90 
96  std::optional<std::string> port;
97 
102  std::optional<std::string> path;
103 
107  std::optional<std::string> query;
108 
112  std::optional<std::string> fragment;
113  };
114 
151  static UrlComponents parse(std::string const& url,
152  std::optional<unsigned int> bitmask_url_flags = std::nullopt,
153  std::optional<unsigned int> bitmask_component_flags = std::nullopt);
154 
165  static std::optional<std::string> extract_component(
166  CurlUrlHandle const& handle,
167  CURLUPart part,
168  std::optional<unsigned int> bitmask_component_flags = std::nullopt,
169  std::optional<CURLUcode> allowed_err_code = std::nullopt);
170 
182  static std::optional<std::string> extract_component(
183  std::string const& url,
184  CURLUPart part,
185  std::optional<unsigned int> bitmask_url_flags = std::nullopt,
186  std::optional<unsigned int> bitmask_component_flags = std::nullopt,
187  std::optional<CURLUcode> allowed_err_code = std::nullopt);
188 };
189 
216 class UrlBuilder {
217  private:
218  CurlUrlHandle _handle;
219 
229  UrlBuilder& set_component(CURLUPart part,
230  char const* value,
231  std::optional<unsigned int> flags = std::nullopt);
232 
233  public:
238  explicit UrlBuilder();
239 
250  explicit UrlBuilder(std::string const& url,
251  std::optional<unsigned int> bitmask_url_flags = std::nullopt);
252 
260  explicit UrlBuilder(UrlParser::UrlComponents const& components,
261  std::optional<unsigned int> bitmask_url_flags = std::nullopt);
262 
275  UrlBuilder& set_scheme(std::optional<std::string> const& scheme);
276 
289  UrlBuilder& set_host(std::optional<std::string> const& host);
290 
303  UrlBuilder& set_port(std::optional<std::string> const& port);
304 
318  UrlBuilder& set_path(std::optional<std::string> const& path);
319 
332  UrlBuilder& set_query(std::optional<std::string> const& query);
333 
346  UrlBuilder& set_fragment(std::optional<std::string> const& fragment);
347 
362  std::string build(std::optional<unsigned int> bitmask_component_flags = std::nullopt) const;
363 
364  static std::string build_manually(UrlParser::UrlComponents const& components);
365 };
366 
390 class UrlEncoder {
391  public:
395  static constexpr char aws_special_chars[] = {
396  '!', '*', '\'', '(', ')', '&', '$', '@', '=', ';', ':', '+',
397  ' ', ',', '?', '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08',
398  '\x09', '\x0A', '\x0B', '\x0C', '\x0D', '\x0E', '\x0F', '\x10', '\x11', '\x12', '\x13', '\x14',
399  '\x15', '\x16', '\x17', '\x18', '\x19', '\x1A', '\x1B', '\x1C', '\x1D', '\x1E', '\x1F', '\x7F'};
400 
427  static std::string encode_path(std::string_view path,
428  std::string_view chars_to_encode = std::string_view{
429  aws_special_chars, sizeof(aws_special_chars)});
430 };
431 
432 } // namespace kvikio::detail
RAII wrapper for libcurl's URL handle (CURLU)
Definition: url.hpp:20
~CurlUrlHandle() noexcept
Clean up the underlying URL handle.
CURLU * get() const
Get the underlying libcurl URL handle.
CurlUrlHandle()
Create a new libcurl URL handle.
URL builder utility using libcurl's URL API.
Definition: url.hpp:216
UrlBuilder & set_host(std::optional< std::string > const &host)
Set the hostname or IP address.
UrlBuilder & set_fragment(std::optional< std::string > const &fragment)
Set the fragment identifier.
UrlBuilder(std::string const &url, std::optional< unsigned int > bitmask_url_flags=std::nullopt)
Construct a URL builder from an existing URL string.
UrlBuilder & set_scheme(std::optional< std::string > const &scheme)
Set the URL scheme (e.g., "http", "https", "ftp")
UrlBuilder(UrlParser::UrlComponents const &components, std::optional< unsigned int > bitmask_url_flags=std::nullopt)
Construct a URL builder from parsed URL components.
UrlBuilder & set_port(std::optional< std::string > const &port)
Set the port number.
UrlBuilder & set_path(std::optional< std::string > const &path)
Set the path component.
std::string build(std::optional< unsigned int > bitmask_component_flags=std::nullopt) const
Build the final URL string.
UrlBuilder()
Construct an empty URL builder.
UrlBuilder & set_query(std::optional< std::string > const &query)
Set the entire query string.
Provides URL encoding functionality.
Definition: url.hpp:390
static std::string encode_path(std::string_view path, std::string_view chars_to_encode=std::string_view{ aws_special_chars, sizeof(aws_special_chars)})
Percent-encodes specified characters in a URL path.
URL parsing utility using libcurl's URL API.
Definition: url.hpp:73
static std::optional< std::string > extract_component(std::string const &url, CURLUPart part, std::optional< unsigned int > bitmask_url_flags=std::nullopt, std::optional< unsigned int > bitmask_component_flags=std::nullopt, std::optional< CURLUcode > allowed_err_code=std::nullopt)
Extract a specific component from a URL string.
static UrlComponents parse(std::string const &url, std::optional< unsigned int > bitmask_url_flags=std::nullopt, std::optional< unsigned int > bitmask_component_flags=std::nullopt)
Parses the given URL according to RFC 3986 plus and extracts its components.
static std::optional< std::string > extract_component(CurlUrlHandle const &handle, CURLUPart part, std::optional< unsigned int > bitmask_component_flags=std::nullopt, std::optional< CURLUcode > allowed_err_code=std::nullopt)
Extract a specific component from a CurlUrlHandle.
Container for parsed URL components.
Definition: url.hpp:78
std::optional< std::string > host
The hostname or IP address. May be empty for URLs without an authority component (e....
Definition: url.hpp:89
std::optional< std::string > path
The path component of the URL. Libcurl ensures that the path component is always present,...
Definition: url.hpp:102
std::optional< std::string > scheme
The URL scheme (e.g., "http", "https", "ftp"). May be empty for scheme-relative URLs or paths.
Definition: url.hpp:83
std::optional< std::string > query
The query string (without the leading "?"). Empty if no query parameters are present.
Definition: url.hpp:107
std::optional< std::string > port
The port number as a string. Will be empty if no explicit port is specified in the URL.
Definition: url.hpp:96
std::optional< std::string > fragment
The fragment identifier (without the leading "#"). Empty if no fragment is present.
Definition: url.hpp:112