string_view.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/types.hpp>
19 
20 #include <iterator>
21 
27 namespace cudf {
28 
29 using char_utf8 = uint32_t;
30 
36 constexpr cudf::size_type UNKNOWN_STRING_LENGTH{-1};
37 
49 class string_view {
50  public:
54  CUDF_HOST_DEVICE [[nodiscard]] inline size_type size_bytes() const { return _bytes; }
58  __device__ [[nodiscard]] inline size_type length() const;
62  CUDF_HOST_DEVICE [[nodiscard]] inline const char* data() const { return _data; }
63 
67  CUDF_HOST_DEVICE [[nodiscard]] inline bool empty() const { return size_bytes() == 0; }
68 
73  public:
74  using difference_type = ptrdiff_t;
75  using value_type = char_utf8;
76  using reference = char_utf8&;
77  using pointer = char_utf8*;
78  using iterator_category = std::input_iterator_tag;
79  __device__ inline const_iterator(const string_view& str, size_type pos);
80  const_iterator(const const_iterator& mit) = default;
81  const_iterator(const_iterator&& mit) = default;
82  const_iterator& operator=(const const_iterator&) = default;
83  const_iterator& operator=(const_iterator&&) = default;
84  __device__ inline const_iterator& operator++();
85  __device__ inline const_iterator operator++(int);
86  __device__ inline const_iterator& operator+=(difference_type);
87  __device__ inline const_iterator operator+(difference_type);
88  __device__ inline const_iterator& operator--();
89  __device__ inline const_iterator operator--(int);
90  __device__ inline const_iterator& operator-=(difference_type);
91  __device__ inline const_iterator operator-(difference_type);
92  __device__ inline bool operator==(const const_iterator&) const;
93  __device__ inline bool operator!=(const const_iterator&) const;
94  __device__ inline bool operator<(const const_iterator&) const;
95  __device__ inline bool operator<=(const const_iterator&) const;
96  __device__ inline bool operator>(const const_iterator&) const;
97  __device__ inline bool operator>=(const const_iterator&) const;
98  __device__ inline char_utf8 operator*() const;
99  [[nodiscard]] __device__ inline size_type position() const;
100  [[nodiscard]] __device__ inline size_type byte_offset() const;
101 
102  private:
103  const char* p{};
104  size_type bytes{};
105  size_type char_pos{};
106  size_type byte_pos{};
107  };
108 
112  __device__ [[nodiscard]] inline const_iterator begin() const;
116  __device__ [[nodiscard]] inline const_iterator end() const;
117 
123  __device__ inline char_utf8 operator[](size_type pos) const;
129  __device__ [[nodiscard]] inline size_type byte_offset(size_type pos) const;
130 
144  __device__ [[nodiscard]] inline int compare(const string_view& str) const;
159  __device__ inline int compare(const char* str, size_type bytes) const;
160 
164  __device__ inline bool operator==(const string_view& rhs) const;
168  __device__ inline bool operator!=(const string_view& rhs) const;
172  __device__ inline bool operator<(const string_view& rhs) const;
176  __device__ inline bool operator>(const string_view& rhs) const;
180  __device__ inline bool operator<=(const string_view& rhs) const;
184  __device__ inline bool operator>=(const string_view& rhs) const;
185 
196  __device__ [[nodiscard]] inline size_type find(const string_view& str,
197  size_type pos = 0,
198  size_type count = -1) const;
210  __device__ inline size_type find(const char* str,
211  size_type bytes,
212  size_type pos = 0,
213  size_type count = -1) const;
224  __device__ [[nodiscard]] inline size_type find(char_utf8 character,
225  size_type pos = 0,
226  size_type count = -1) const;
237  __device__ [[nodiscard]] inline size_type rfind(const string_view& str,
238  size_type pos = 0,
239  size_type count = -1) const;
251  __device__ inline size_type rfind(const char* str,
252  size_type bytes,
253  size_type pos = 0,
254  size_type count = -1) const;
265  __device__ [[nodiscard]] inline size_type rfind(char_utf8 character,
266  size_type pos = 0,
267  size_type count = -1) const;
268 
277  __device__ [[nodiscard]] inline string_view substr(size_type start, size_type length) const;
278 
287  CUDF_HOST_DEVICE inline static string_view min();
288 
298  CUDF_HOST_DEVICE inline static string_view max();
299 
303  CUDF_HOST_DEVICE inline string_view() : _data("") {}
304 
311  CUDF_HOST_DEVICE inline string_view(const char* data, size_type bytes)
312  : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH)
313  {
314  }
315 
316  string_view(const string_view&) = default;
317  string_view(string_view&&) = default;
318  ~string_view() = default;
319  string_view& operator=(const string_view&) = default;
320  string_view& operator=(string_view&&) = default;
321 
322  private:
323  const char* _data{};
324  size_type _bytes{};
325  mutable size_type _length{};
326 
333  __device__ [[nodiscard]] inline size_type character_offset(size_type bytepos) const;
334 };
335 
336 } // namespace cudf
cudf::string_view::substr
string_view substr(size_type start, size_type length) const
Return a sub-string of this string. The original string and device memory must still be maintained fo...
Definition: string_view.cuh:408
cudf::char_utf8
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:29
cudf::string_view::byte_offset
size_type byte_offset(size_type pos) const
Return the byte offset from data() for a given character position.
Definition: string_view.cuh:254
types.hpp
Type declarations for libcudf.
cudf::string_view
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:49
cudf::string_view::size_bytes
CUDF_HOST_DEVICE size_type size_bytes() const
Return the number of bytes in this string.
Definition: string_view.hpp:54
cudf::string_view::operator<=
bool operator<=(const string_view &rhs) const
Returns true if this string matches or is ordered before rhs.
Definition: string_view.cuh:310
cudf::string_view::operator[]
char_utf8 operator[](size_type pos) const
Return single UTF-8 character at the given character position.
Definition: string_view.cuh:245
cudf::string_view::operator<
bool operator<(const string_view &rhs) const
Returns true if this string is ordered before rhs.
Definition: string_view.cuh:300
cudf::UNKNOWN_STRING_LENGTH
constexpr cudf::size_type UNKNOWN_STRING_LENGTH
The string length is initialized to this value as a place-holder.
Definition: string_view.hpp:36
cudf::string_view::operator>
bool operator>(const string_view &rhs) const
Returns true if rhs is ordered before this string.
Definition: string_view.cuh:305
cudf::string_view::rfind
size_type rfind(const string_view &str, size_type pos=0, size_type count=-1) const
Returns the character position of the last occurrence where the argument str is found in this string ...
Definition: string_view.cuh:365
cudf::string_view::string_view
CUDF_HOST_DEVICE string_view()
Default constructor represents an empty string.
Definition: string_view.hpp:303
cudf::string_view::compare
int compare(const string_view &str) const
Comparing target string with this string. Each character is compared as a UTF-8 code-point value.
Definition: string_view.cuh:268
cudf::string_view::operator!=
bool operator!=(const string_view &rhs) const
Returns true if rhs does not match this string.
Definition: string_view.cuh:295
cudf
cuDF interfaces
Definition: aggregation.hpp:34
cudf::string_view::const_iterator
Handy iterator for navigating through encoded characters.
Definition: string_view.hpp:72
cudf::string_view::begin
const_iterator begin() const
Return new iterator pointing to the beginning of this string.
Definition: string_view.cuh:235
cudf::string_view::length
size_type length() const
Return the number of characters in this string.
Definition: string_view.cuh:106
cudf::string_view::end
const_iterator end() const
Return new iterator pointing past the end of this string.
Definition: string_view.cuh:240
cudf::string_view::max
static CUDF_HOST_DEVICE string_view max()
Return maximum value associated with the string type.
Definition: string_view.cuh:95
cudf::string_view::operator==
bool operator==(const string_view &rhs) const
Returns true if rhs matches this string exactly.
Definition: string_view.cuh:290
cudf::string_view::find
size_type find(const string_view &str, size_type pos=0, size_type count=-1) const
Returns the character position of the first occurrence where the argument str is found in this string...
Definition: string_view.cuh:322
cudf::string_view::empty
CUDF_HOST_DEVICE bool empty() const
Return true if string has no characters.
Definition: string_view.hpp:67
cudf::string_view::data
CUDF_HOST_DEVICE const char * data() const
Return a pointer to the internal device array.
Definition: string_view.hpp:62
cudf::string_view::min
static CUDF_HOST_DEVICE string_view min()
Return minimum value associated with the string type.
Definition: string_view.cuh:84
cudf::string_view::operator>=
bool operator>=(const string_view &rhs) const
Returns true if rhs matches or is ordered before this string.
Definition: string_view.cuh:316