string_view.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
7 #include <cudf/types.hpp>
8 
9 #include <cuda/std/iterator>
10 
11 #ifndef __CUDACC_RTC__
12 #include <cuda_runtime.h>
13 #endif
14 
20 namespace CUDF_EXPORT cudf {
21 
22 using char_utf8 = uint32_t;
23 
35 class string_view {
36  public:
42  CUDF_HOST_DEVICE [[nodiscard]] inline size_type size_bytes() const { return _bytes; }
48  [[nodiscard]] __device__ inline size_type length() const;
54  CUDF_HOST_DEVICE [[nodiscard]] inline char const* data() const { return _data; }
55 
61  CUDF_HOST_DEVICE [[nodiscard]] inline bool empty() const { return size_bytes() == 0; }
62 
68  public:
69  using difference_type = ptrdiff_t;
70  using value_type = char_utf8;
71  using reference = char_utf8&;
72  using pointer = char_utf8*;
73  using iterator_category = cuda::std::input_iterator_tag;
74  __device__ inline const_iterator(string_view const& str, size_type pos);
75  const_iterator(const_iterator const& mit) = default;
76  const_iterator(const_iterator&& mit) = default;
77  const_iterator& operator=(const_iterator const&) = default;
78  const_iterator& operator=(const_iterator&&) = default;
79  __device__ inline const_iterator& operator++();
80  __device__ inline const_iterator operator++(int);
81  __device__ inline const_iterator& operator+=(difference_type);
82  __device__ inline const_iterator operator+(difference_type) const;
83  __device__ inline const_iterator& operator--();
84  __device__ inline const_iterator operator--(int);
85  __device__ inline const_iterator& operator-=(difference_type);
86  __device__ inline const_iterator operator-(difference_type) const;
87  __device__ inline const_iterator& move_to(size_type);
88  __device__ inline bool operator==(const_iterator const&) const;
89  __device__ inline bool operator!=(const_iterator const&) const;
90  __device__ inline bool operator<(const_iterator const&) const;
91  __device__ inline bool operator<=(const_iterator const&) const;
92  __device__ inline bool operator>(const_iterator const&) const;
93  __device__ inline bool operator>=(const_iterator const&) const;
94  __device__ inline char_utf8 operator*() const;
95  [[nodiscard]] __device__ inline size_type position() const;
96  [[nodiscard]] __device__ inline size_type byte_offset() const;
97 
98  private:
99  friend class string_view;
100  char const* p{};
101  size_type bytes{};
102  size_type char_pos{};
103  size_type byte_pos{};
104  __device__ inline const_iterator(string_view const& str, size_type pos, size_type offset);
106  };
107 
113  [[nodiscard]] __device__ inline const_iterator begin() const;
119  [[nodiscard]] __device__ inline const_iterator end() const;
120 
127  __device__ inline char_utf8 operator[](size_type pos) const;
134  [[nodiscard]] __device__ inline size_type byte_offset(size_type pos) const;
135 
149  [[nodiscard]] __device__ inline int compare(string_view const& str) const;
164  __device__ inline int compare(char const* str, size_type bytes) const;
165 
172  __device__ inline bool operator==(string_view const& rhs) const;
179  __device__ inline bool operator!=(string_view const& rhs) const;
186  __device__ inline bool operator<(string_view const& rhs) const;
193  __device__ inline bool operator>(string_view const& rhs) const;
200  __device__ inline bool operator<=(string_view const& rhs) const;
207  __device__ inline bool operator>=(string_view const& rhs) const;
208 
219  [[nodiscard]] __device__ inline size_type find(string_view const& str,
220  size_type pos = 0,
221  size_type count = -1) const;
233  __device__ inline size_type find(char const* str,
234  size_type bytes,
235  size_type pos = 0,
236  size_type count = -1) const;
247  [[nodiscard]] __device__ inline size_type find(char_utf8 character,
248  size_type pos = 0,
249  size_type count = -1) const;
260  [[nodiscard]] __device__ inline size_type rfind(string_view const& str,
261  size_type pos = 0,
262  size_type count = -1) const;
274  __device__ inline size_type rfind(char const* str,
275  size_type bytes,
276  size_type pos = 0,
277  size_type count = -1) const;
288  [[nodiscard]] __device__ inline size_type rfind(char_utf8 character,
289  size_type pos = 0,
290  size_type count = -1) const;
291 
300  [[nodiscard]] __device__ inline string_view substr(size_type start, size_type length) const;
301 
310  CUDF_HOST_DEVICE inline static string_view min();
311 
321  CUDF_HOST_DEVICE inline static string_view max();
322 
326  CUDF_HOST_DEVICE inline string_view() : _data("") {}
327 
334  CUDF_HOST_DEVICE inline string_view(char const* data, size_type bytes)
335  : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH)
336  {
337  }
338 
339  string_view(string_view const&) = default;
340  string_view(string_view&&) = default;
341  ~string_view() = default;
347  string_view& operator=(string_view const&) = default;
354 
360  static inline cudf::size_type const npos{-1};
361 
362  private:
363  char const* _data{};
364  size_type _bytes{};
365  mutable size_type _length{};
366 
372  static inline cudf::size_type const UNKNOWN_STRING_LENGTH{-1};
373 
380  [[nodiscard]] __device__ inline size_type character_offset(size_type bytepos) const;
381 
393  template <bool forward>
394  __device__ inline size_type find_impl(char const* str,
395  size_type bytes,
396  size_type pos,
397  size_type count) const;
398 };
399 
400 } // namespace CUDF_EXPORT cudf
Handy iterator for navigating through encoded characters.
Definition: string_view.hpp:66
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:35
CUDF_HOST_DEVICE size_type size_bytes() const
Return the number of bytes in this string.
Definition: string_view.hpp:42
string_view & operator=(string_view const &)=default
Copy assignment operator.
CUDF_HOST_DEVICE string_view()
Default constructor represents an empty string.
CUDF_HOST_DEVICE bool empty() const
Return true if string has no characters.
Definition: string_view.hpp:61
string_view(string_view &&)=default
Move constructor.
const_iterator end() const
Return new iterator pointing past the end of this string.
CUDF_HOST_DEVICE string_view(char const *data, size_type bytes)
Create instance from existing device char array.
string_view & operator=(string_view &&)=default
Move assignment operator.
CUDF_HOST_DEVICE char const * data() const
Return a pointer to the internal device array.
Definition: string_view.hpp:54
const_iterator begin() const
Return new iterator pointing to the beginning of this string.
string_view(string_view const &)=default
Copy constructor.
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator-(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator>=(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator<=(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator*(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator>(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator+(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE bool operator<(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
bool operator==(polymorphic_allocator< T > const &lhs, polymorphic_allocator< U > const &rhs)
bool operator!=(polymorphic_allocator< T > const &lhs, polymorphic_allocator< U > const &rhs)
std::unique_ptr< column > count(strings_column_view const &input, string_scalar const &target, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns the number of times the given target string matches in each string.
std::unique_ptr< column > rfind(strings_column_view const &input, string_scalar const &target, size_type start=0, size_type stop=-1, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a column of character position values where the target string is first found searching from t...
std::unique_ptr< column > find(strings_column_view const &input, string_scalar const &target, size_type start=0, size_type stop=-1, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a column of character position values where the target string is first found in each string o...
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:85
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:22
cuDF interfaces
Definition: host_udf.hpp:26
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:21