string_view.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2024, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <cudf/types.hpp>
8 
9 #include <cuda_runtime.h>
10 
11 #include <iterator>
12 
18 namespace CUDF_EXPORT cudf {
19 
20 using char_utf8 = uint32_t;
21 
33 class string_view {
34  public:
40  CUDF_HOST_DEVICE [[nodiscard]] inline size_type size_bytes() const { return _bytes; }
46  [[nodiscard]] __device__ inline size_type length() const;
52  CUDF_HOST_DEVICE [[nodiscard]] inline char const* data() const { return _data; }
53 
59  CUDF_HOST_DEVICE [[nodiscard]] inline bool empty() const { return size_bytes() == 0; }
60 
66  public:
67  using difference_type = ptrdiff_t;
68  using value_type = char_utf8;
69  using reference = char_utf8&;
70  using pointer = char_utf8*;
71  using iterator_category = std::input_iterator_tag;
72  __device__ inline const_iterator(string_view const& str, size_type pos);
73  const_iterator(const_iterator const& mit) = default;
74  const_iterator(const_iterator&& mit) = default;
75  const_iterator& operator=(const_iterator const&) = default;
76  const_iterator& operator=(const_iterator&&) = default;
77  __device__ inline const_iterator& operator++();
78  __device__ inline const_iterator operator++(int);
79  __device__ inline const_iterator& operator+=(difference_type);
80  __device__ inline const_iterator operator+(difference_type) const;
81  __device__ inline const_iterator& operator--();
82  __device__ inline const_iterator operator--(int);
83  __device__ inline const_iterator& operator-=(difference_type);
84  __device__ inline const_iterator operator-(difference_type) const;
85  __device__ inline const_iterator& move_to(size_type);
86  __device__ inline bool operator==(const_iterator const&) const;
87  __device__ inline bool operator!=(const_iterator const&) const;
88  __device__ inline bool operator<(const_iterator const&) const;
89  __device__ inline bool operator<=(const_iterator const&) const;
90  __device__ inline bool operator>(const_iterator const&) const;
91  __device__ inline bool operator>=(const_iterator const&) const;
92  __device__ inline char_utf8 operator*() const;
93  [[nodiscard]] __device__ inline size_type position() const;
94  [[nodiscard]] __device__ inline size_type byte_offset() const;
95 
96  private:
97  friend class string_view;
98  char const* p{};
99  size_type bytes{};
100  size_type char_pos{};
101  size_type byte_pos{};
102  __device__ inline const_iterator(string_view const& str, size_type pos, size_type offset);
104  };
105 
111  [[nodiscard]] __device__ inline const_iterator begin() const;
117  [[nodiscard]] __device__ inline const_iterator end() const;
118 
125  __device__ inline char_utf8 operator[](size_type pos) const;
132  [[nodiscard]] __device__ inline size_type byte_offset(size_type pos) const;
133 
147  [[nodiscard]] __device__ inline int compare(string_view const& str) const;
162  __device__ inline int compare(char const* str, size_type bytes) const;
163 
170  __device__ inline bool operator==(string_view const& rhs) const;
177  __device__ inline bool operator!=(string_view const& rhs) const;
184  __device__ inline bool operator<(string_view const& rhs) const;
191  __device__ inline bool operator>(string_view const& rhs) const;
198  __device__ inline bool operator<=(string_view const& rhs) const;
205  __device__ inline bool operator>=(string_view const& rhs) const;
206 
217  [[nodiscard]] __device__ inline size_type find(string_view const& str,
218  size_type pos = 0,
219  size_type count = -1) const;
231  __device__ inline size_type find(char const* str,
232  size_type bytes,
233  size_type pos = 0,
234  size_type count = -1) const;
245  [[nodiscard]] __device__ inline size_type find(char_utf8 character,
246  size_type pos = 0,
247  size_type count = -1) const;
258  [[nodiscard]] __device__ inline size_type rfind(string_view const& str,
259  size_type pos = 0,
260  size_type count = -1) const;
272  __device__ inline size_type rfind(char const* str,
273  size_type bytes,
274  size_type pos = 0,
275  size_type count = -1) const;
286  [[nodiscard]] __device__ inline size_type rfind(char_utf8 character,
287  size_type pos = 0,
288  size_type count = -1) const;
289 
298  [[nodiscard]] __device__ inline string_view substr(size_type start, size_type length) const;
299 
308  CUDF_HOST_DEVICE inline static string_view min();
309 
319  CUDF_HOST_DEVICE inline static string_view max();
320 
324  CUDF_HOST_DEVICE inline string_view() : _data("") {}
325 
332  CUDF_HOST_DEVICE inline string_view(char const* data, size_type bytes)
333  : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH)
334  {
335  }
336 
337  string_view(string_view const&) = default;
338  string_view(string_view&&) = default;
339  ~string_view() = default;
345  string_view& operator=(string_view const&) = default;
352 
358  static inline cudf::size_type const npos{-1};
359 
360  private:
361  char const* _data{};
362  size_type _bytes{};
363  mutable size_type _length{};
364 
370  static inline cudf::size_type const UNKNOWN_STRING_LENGTH{-1};
371 
378  [[nodiscard]] __device__ inline size_type character_offset(size_type bytepos) const;
379 
391  template <bool forward>
392  __device__ inline size_type find_impl(char const* str,
393  size_type bytes,
394  size_type pos,
395  size_type count) const;
396 };
397 
398 } // namespace CUDF_EXPORT cudf
Handy iterator for navigating through encoded characters.
Definition: string_view.hpp:64
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:33
CUDF_HOST_DEVICE size_type size_bytes() const
Return the number of bytes in this string.
Definition: string_view.hpp:40
string_view & operator=(string_view const &)=default
Copy assignment operator.
CUDF_HOST_DEVICE string_view()
Default constructor represents an empty string.
CUDF_HOST_DEVICE bool empty() const
Return true if string has no characters.
Definition: string_view.hpp:59
string_view(string_view &&)=default
Move constructor.
const_iterator end() const
Return new iterator pointing past the end of this string.
CUDF_HOST_DEVICE string_view(char const *data, size_type bytes)
Create instance from existing device char array.
string_view & operator=(string_view &&)=default
Move assignment operator.
CUDF_HOST_DEVICE char const * data() const
Return a pointer to the internal device array.
Definition: string_view.hpp:52
const_iterator begin() const
Return new iterator pointing to the beginning of this string.
string_view(string_view const &)=default
Copy constructor.
bool operator==(polymorphic_allocator< T > const &lhs, polymorphic_allocator< U > const &rhs)
bool operator!=(polymorphic_allocator< T > const &lhs, polymorphic_allocator< U > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator-(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator>=(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator<=(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator*(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator>(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator+(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator<(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
std::unique_ptr< column > rfind(strings_column_view const &input, string_scalar const &target, size_type start=0, size_type stop=-1, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a column of character position values where the target string is first found searching from t...
std::unique_ptr< column > find(strings_column_view const &input, string_scalar const &target, size_type start=0, size_type stop=-1, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a column of character position values where the target string is first found in each string o...
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:20
cuDF interfaces
Definition: host_udf.hpp:26
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:21