string_view.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/types.hpp>
19 
20 #include <cuda_runtime.h>
21 
22 #include <iterator>
23 
29 namespace cudf {
30 
31 using char_utf8 = uint32_t;
32 
44 class string_view {
45  public:
51  CUDF_HOST_DEVICE [[nodiscard]] inline size_type size_bytes() const { return _bytes; }
57  __device__ [[nodiscard]] inline size_type length() const;
63  CUDF_HOST_DEVICE [[nodiscard]] inline char const* data() const { return _data; }
64 
70  CUDF_HOST_DEVICE [[nodiscard]] inline bool empty() const { return size_bytes() == 0; }
71 
77  public:
78  using difference_type = ptrdiff_t;
79  using value_type = char_utf8;
80  using reference = char_utf8&;
81  using pointer = char_utf8*;
82  using iterator_category = std::input_iterator_tag;
83  __device__ inline const_iterator(string_view const& str, size_type pos);
84  const_iterator(const_iterator const& mit) = default;
85  const_iterator(const_iterator&& mit) = default;
86  const_iterator& operator=(const_iterator const&) = default;
88  __device__ inline const_iterator& operator++();
89  __device__ inline const_iterator operator++(int);
90  __device__ inline const_iterator& operator+=(difference_type);
91  __device__ inline const_iterator operator+(difference_type) const;
92  __device__ inline const_iterator& operator--();
93  __device__ inline const_iterator operator--(int);
94  __device__ inline const_iterator& operator-=(difference_type);
95  __device__ inline const_iterator operator-(difference_type) const;
96  __device__ inline const_iterator& move_to(size_type);
97  __device__ inline bool operator==(const_iterator const&) const;
98  __device__ inline bool operator!=(const_iterator const&) const;
99  __device__ inline bool operator<(const_iterator const&) const;
100  __device__ inline bool operator<=(const_iterator const&) const;
101  __device__ inline bool operator>(const_iterator const&) const;
102  __device__ inline bool operator>=(const_iterator const&) const;
103  __device__ inline char_utf8 operator*() const;
104  [[nodiscard]] __device__ inline size_type position() const;
105  [[nodiscard]] __device__ inline size_type byte_offset() const;
106 
107  private:
108  friend class string_view;
109  char const* p{};
110  size_type bytes{};
111  size_type char_pos{};
112  size_type byte_pos{};
113  __device__ inline const_iterator(string_view const& str, size_type pos, size_type offset);
115  };
116 
122  __device__ [[nodiscard]] inline const_iterator begin() const;
128  __device__ [[nodiscard]] inline const_iterator end() const;
129 
136  __device__ inline char_utf8 operator[](size_type pos) const;
143  __device__ [[nodiscard]] inline size_type byte_offset(size_type pos) const;
144 
158  __device__ [[nodiscard]] inline int compare(string_view const& str) const;
173  __device__ inline int compare(char const* str, size_type bytes) const;
174 
181  __device__ inline bool operator==(string_view const& rhs) const;
188  __device__ inline bool operator!=(string_view const& rhs) const;
195  __device__ inline bool operator<(string_view const& rhs) const;
202  __device__ inline bool operator>(string_view const& rhs) const;
209  __device__ inline bool operator<=(string_view const& rhs) const;
216  __device__ inline bool operator>=(string_view const& rhs) const;
217 
228  __device__ [[nodiscard]] inline size_type find(string_view const& str,
229  size_type pos = 0,
230  size_type count = -1) const;
242  __device__ inline size_type find(char const* str,
243  size_type bytes,
244  size_type pos = 0,
245  size_type count = -1) const;
256  __device__ [[nodiscard]] inline size_type find(char_utf8 character,
257  size_type pos = 0,
258  size_type count = -1) const;
269  __device__ [[nodiscard]] inline size_type rfind(string_view const& str,
270  size_type pos = 0,
271  size_type count = -1) const;
283  __device__ inline size_type rfind(char const* str,
284  size_type bytes,
285  size_type pos = 0,
286  size_type count = -1) const;
297  __device__ [[nodiscard]] inline size_type rfind(char_utf8 character,
298  size_type pos = 0,
299  size_type count = -1) const;
300 
309  __device__ [[nodiscard]] inline string_view substr(size_type start, size_type length) const;
310 
319  CUDF_HOST_DEVICE inline static string_view min();
320 
330  CUDF_HOST_DEVICE inline static string_view max();
331 
335  CUDF_HOST_DEVICE inline string_view() : _data("") {}
336 
343  CUDF_HOST_DEVICE inline string_view(char const* data, size_type bytes)
344  : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH)
345  {
346  }
347 
348  string_view(string_view const&) = default;
349  string_view(string_view&&) = default;
350  ~string_view() = default;
356  string_view& operator=(string_view const&) = default;
363 
369  static inline cudf::size_type const npos{-1};
370 
371  private:
372  char const* _data{};
373  size_type _bytes{};
374  mutable size_type _length{};
375 
381  static inline cudf::size_type const UNKNOWN_STRING_LENGTH{-1};
382 
389  __device__ [[nodiscard]] inline size_type character_offset(size_type bytepos) const;
390 
402  template <bool forward>
403  __device__ inline size_type find_impl(char const* str,
404  size_type bytes,
405  size_type pos,
406  size_type count) const;
407 };
408 
409 } // namespace cudf
Handy iterator for navigating through encoded characters.
Definition: string_view.hpp:75
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
CUDF_HOST_DEVICE size_type size_bytes() const
Return the number of bytes in this string.
Definition: string_view.hpp:51
size_type rfind(string_view const &str, size_type pos=0, size_type count=-1) const
Returns the character position of the last occurrence where the argument str is found in this string ...
string_view & operator=(string_view const &)=default
Copy assignment operator.
CUDF_HOST_DEVICE string_view()
Default constructor represents an empty string.
size_type length() const
Return the number of characters in this string.
string_view substr(size_type start, size_type length) const
Return a sub-string of this string. The original string and device memory must still be maintained fo...
CUDF_HOST_DEVICE bool empty() const
Return true if string has no characters.
Definition: string_view.hpp:70
bool operator==(string_view const &rhs) const
Returns true if rhs matches this string exactly.
string_view(string_view &&)=default
Move constructor.
const_iterator end() const
Return new iterator pointing past the end of this string.
CUDF_HOST_DEVICE string_view(char const *data, size_type bytes)
Create instance from existing device char array.
int compare(string_view const &str) const
Comparing target string with this string. Each character is compared as a UTF-8 code-point value.
bool operator>=(string_view const &rhs) const
Returns true if rhs matches or is ordered before this string.
string_view & operator=(string_view &&)=default
Move assignment operator.
CUDF_HOST_DEVICE char const * data() const
Return a pointer to the internal device array.
Definition: string_view.hpp:63
size_type byte_offset(size_type pos) const
Return the byte offset from data() for a given character position.
const_iterator begin() const
Return new iterator pointing to the beginning of this string.
char_utf8 operator[](size_type pos) const
Return single UTF-8 character at the given character position.
string_view(string_view const &)=default
Copy constructor.
bool operator!=(string_view const &rhs) const
Returns true if rhs does not match this string.
size_type find(string_view const &str, size_type pos=0, size_type count=-1) const
Returns the character position of the first occurrence where the argument str is found in this string...
static CUDF_HOST_DEVICE string_view max()
Return maximum value associated with the string type.
bool operator<=(string_view const &rhs) const
Returns true if this string matches or is ordered before rhs.
bool operator<(string_view const &rhs) const
Returns true if this string is ordered before rhs.
bool operator>(string_view const &rhs) const
Returns true if rhs is ordered before this string.
static cudf::size_type const npos
No-position value.
static CUDF_HOST_DEVICE string_view min()
Return minimum value associated with the string type.
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator-(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator*(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator+(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:31
cuDF interfaces
Definition: aggregation.hpp:34
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32