string_view.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2022, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/types.hpp>
19 
20 #include <cuda_runtime.h>
21 
22 #include <iterator>
23 
29 namespace cudf {
30 
31 using char_utf8 = uint32_t;
32 
44 class string_view {
45  public:
51  CUDF_HOST_DEVICE [[nodiscard]] inline size_type size_bytes() const { return _bytes; }
57  __device__ [[nodiscard]] inline size_type length() const;
63  CUDF_HOST_DEVICE [[nodiscard]] inline const char* data() const { return _data; }
64 
70  CUDF_HOST_DEVICE [[nodiscard]] inline bool empty() const { return size_bytes() == 0; }
71 
77  public:
78  using difference_type = ptrdiff_t;
79  using value_type = char_utf8;
80  using reference = char_utf8&;
81  using pointer = char_utf8*;
82  using iterator_category = std::input_iterator_tag;
83  __device__ inline const_iterator(const string_view& str, size_type pos);
84  const_iterator(const const_iterator& mit) = default;
85  const_iterator(const_iterator&& mit) = default;
86  const_iterator& operator=(const const_iterator&) = default;
88  __device__ inline const_iterator& operator++();
89  __device__ inline const_iterator operator++(int);
90  __device__ inline const_iterator& operator+=(difference_type);
91  __device__ inline const_iterator operator+(difference_type);
92  __device__ inline const_iterator& operator--();
93  __device__ inline const_iterator operator--(int);
94  __device__ inline const_iterator& operator-=(difference_type);
95  __device__ inline const_iterator operator-(difference_type);
96  __device__ inline bool operator==(const const_iterator&) const;
97  __device__ inline bool operator!=(const const_iterator&) const;
98  __device__ inline bool operator<(const const_iterator&) const;
99  __device__ inline bool operator<=(const const_iterator&) const;
100  __device__ inline bool operator>(const const_iterator&) const;
101  __device__ inline bool operator>=(const const_iterator&) const;
102  __device__ inline char_utf8 operator*() const;
103  [[nodiscard]] __device__ inline size_type position() const;
104  [[nodiscard]] __device__ inline size_type byte_offset() const;
105 
106  private:
107  const char* p{};
108  size_type bytes{};
109  size_type char_pos{};
110  size_type byte_pos{};
112  };
113 
119  __device__ [[nodiscard]] inline const_iterator begin() const;
125  __device__ [[nodiscard]] inline const_iterator end() const;
126 
133  __device__ inline char_utf8 operator[](size_type pos) const;
140  __device__ [[nodiscard]] inline size_type byte_offset(size_type pos) const;
141 
155  __device__ [[nodiscard]] inline int compare(const string_view& str) const;
170  __device__ inline int compare(const char* str, size_type bytes) const;
171 
178  __device__ inline bool operator==(const string_view& rhs) const;
185  __device__ inline bool operator!=(const string_view& rhs) const;
192  __device__ inline bool operator<(const string_view& rhs) const;
199  __device__ inline bool operator>(const string_view& rhs) const;
206  __device__ inline bool operator<=(const string_view& rhs) const;
213  __device__ inline bool operator>=(const string_view& rhs) const;
214 
225  __device__ [[nodiscard]] inline size_type find(const string_view& str,
226  size_type pos = 0,
227  size_type count = -1) const;
239  __device__ inline size_type find(const char* str,
240  size_type bytes,
241  size_type pos = 0,
242  size_type count = -1) const;
253  __device__ [[nodiscard]] inline size_type find(char_utf8 character,
254  size_type pos = 0,
255  size_type count = -1) const;
266  __device__ [[nodiscard]] inline size_type rfind(const string_view& str,
267  size_type pos = 0,
268  size_type count = -1) const;
280  __device__ inline size_type rfind(const char* str,
281  size_type bytes,
282  size_type pos = 0,
283  size_type count = -1) const;
294  __device__ [[nodiscard]] inline size_type rfind(char_utf8 character,
295  size_type pos = 0,
296  size_type count = -1) const;
297 
306  __device__ [[nodiscard]] inline string_view substr(size_type start, size_type length) const;
307 
316  CUDF_HOST_DEVICE inline static string_view min();
317 
327  CUDF_HOST_DEVICE inline static string_view max();
328 
332  CUDF_HOST_DEVICE inline string_view() : _data("") {}
333 
340  CUDF_HOST_DEVICE inline string_view(const char* data, size_type bytes)
341  : _data(data), _bytes(bytes), _length(UNKNOWN_STRING_LENGTH)
342  {
343  }
344 
345  string_view(const string_view&) = default;
346  string_view(string_view&&) = default;
347  ~string_view() = default;
353  string_view& operator=(const string_view&) = default;
360 
366  static inline cudf::size_type const npos{-1};
367 
368  private:
369  const char* _data{};
370  size_type _bytes{};
371  mutable size_type _length{};
372 
378  static inline cudf::size_type const UNKNOWN_STRING_LENGTH{-1};
379 
386  __device__ [[nodiscard]] inline size_type character_offset(size_type bytepos) const;
387 
399  template <bool forward>
400  __device__ inline size_type find_impl(const char* str,
401  size_type bytes,
402  size_type pos,
403  size_type count) const;
404 };
405 
406 } // namespace cudf
cudf::string_view::operator=
string_view & operator=(string_view &&)=default
Move assignment operator.
cudf::string_view::substr
string_view substr(size_type start, size_type length) const
Return a sub-string of this string. The original string and device memory must still be maintained fo...
Definition: string_view.cuh:399
cudf::string_view::end
const_iterator end() const
Return new iterator pointing past the end of this string.
cudf::char_utf8
uint32_t char_utf8
UTF-8 characters are 1-4 bytes.
Definition: string_view.hpp:31
numeric::operator+
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator+(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
Definition: fixed_point.hpp:703
cudf::size_type
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:80
cudf::string_view::byte_offset
size_type byte_offset(size_type pos) const
Return the byte offset from data() for a given character position.
Definition: string_view.cuh:260
types.hpp
Type declarations for libcudf.
cudf::string_view::operator=
string_view & operator=(const string_view &)=default
Copy assignment operator.
cudf::string_view
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
cudf::string_view::size_bytes
CUDF_HOST_DEVICE size_type size_bytes() const
Return the number of bytes in this string.
Definition: string_view.hpp:51
cudf::string_view::operator<=
bool operator<=(const string_view &rhs) const
Returns true if this string matches or is ordered before rhs.
Definition: string_view.cuh:316
cudf::string_view::operator[]
char_utf8 operator[](size_type pos) const
Return single UTF-8 character at the given character position.
Definition: string_view.cuh:251
cudf::string_view::operator<
bool operator<(const string_view &rhs) const
Returns true if this string is ordered before rhs.
Definition: string_view.cuh:306
cudf::string_view::operator>
bool operator>(const string_view &rhs) const
Returns true if rhs is ordered before this string.
Definition: string_view.cuh:311
cudf::string_view::rfind
size_type rfind(const string_view &str, size_type pos=0, size_type count=-1) const
Returns the character position of the last occurrence where the argument str is found in this string ...
Definition: string_view.cuh:376
cudf::string_view::string_view
CUDF_HOST_DEVICE string_view()
Default constructor represents an empty string.
Definition: string_view.hpp:332
cudf::string_view::npos
static cudf::size_type const npos
No-position value.
Definition: string_view.hpp:366
cudf::string_view::string_view
string_view(string_view &&)=default
Move constructor.
cudf::string_view::compare
int compare(const string_view &str) const
Comparing target string with this string. Each character is compared as a UTF-8 code-point value.
Definition: string_view.cuh:274
cudf::string_view::operator!=
bool operator!=(const string_view &rhs) const
Returns true if rhs does not match this string.
Definition: string_view.cuh:301
numeric::operator*
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator*(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
Definition: fixed_point.hpp:739
cudf::string_view::begin
const_iterator begin() const
Return new iterator pointing to the beginning of this string.
cudf
cuDF interfaces
Definition: aggregation.hpp:34
numeric::operator-
CUDF_HOST_DEVICE fixed_point< Rep1, Rad1 > operator-(fixed_point< Rep1, Rad1 > const &lhs, fixed_point< Rep1, Rad1 > const &rhs)
Definition: fixed_point.hpp:721
cudf::string_view::const_iterator
Handy iterator for navigating through encoded characters.
Definition: string_view.hpp:75
cudf::string_view::string_view
string_view(const string_view &)=default
Copy constructor.
cudf::string_view::length
size_type length() const
Return the number of characters in this string.
Definition: string_view.cuh:110
cudf::string_view::max
static CUDF_HOST_DEVICE string_view max()
Return maximum value associated with the string type.
Definition: string_view.cuh:98
cudf::string_view::operator==
bool operator==(const string_view &rhs) const
Returns true if rhs matches this string exactly.
Definition: string_view.cuh:296
cudf::string_view::find
size_type find(const string_view &str, size_type pos=0, size_type count=-1) const
Returns the character position of the first occurrence where the argument str is found in this string...
Definition: string_view.cuh:328
cudf::string_view::empty
CUDF_HOST_DEVICE bool empty() const
Return true if string has no characters.
Definition: string_view.hpp:70
cudf::string_view::data
CUDF_HOST_DEVICE const char * data() const
Return a pointer to the internal device array.
Definition: string_view.hpp:63
cudf::string_view::min
static CUDF_HOST_DEVICE string_view min()
Return minimum value associated with the string type.
Definition: string_view.cuh:87
cudf::string_view::operator>=
bool operator>=(const string_view &rhs) const
Returns true if rhs matches or is ordered before this string.
Definition: string_view.cuh:322