Strings Contains#
- group strings_contains
Functions
-
std::unique_ptr<column> contains_re(strings_column_view const &input, regex_program const &prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns a boolean column identifying rows which match the given regex_program object.
Example: s = ["abc", "123", "def456"] p = regex_program::create("\\d+") r = contains_re(s, p) r is now [false, true, true]
Any null string entries return corresponding null output column entries.
See the Regex Features page for details on patterns supported by this API.
- Parameters:
input – Strings instance for this operation
prog – Regex program instance
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New column of boolean results for each string
-
std::unique_ptr<column> matches_re(strings_column_view const &input, regex_program const &prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns a boolean column identifying rows which matching the given regex_program object but only at the beginning the string.
Example: s = ["abc", "123", "def456"] p = regex_program::create("\\d+") r = matches_re(s, p) r is now [false, true, false]
Any null string entries return corresponding null output column entries.
See the Regex Features page for details on patterns supported by this API.
- Parameters:
input – Strings instance for this operation
prog – Regex program instance
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New column of boolean results for each string
-
std::unique_ptr<column> count_re(strings_column_view const &input, regex_program const &prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns the number of times the given regex_program’s pattern matches in each string.
Example: s = ["abc", "123", "def45"] p = regex_program::create("\\d") r = count_re(s, p) r is now [0, 3, 2]
Any null string entries return corresponding null output column entries.
See the Regex Features page for details on patterns supported by this API.
- Parameters:
input – Strings instance for this operation
prog – Regex program instance
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New column of match counts for each string
-
std::unique_ptr<column> like(strings_column_view const &input, string_scalar const &pattern, string_scalar const &escape_character = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns a boolean column identifying rows which match the given like pattern.
The like pattern expects only 2 wildcard special characters:
%
zero or more of any character_
any single character
Example: s = ["azaa", "ababaabba", "aaxa"] r = like(s, "%a_aa%") r is now [1, 1, 0] r = like(s, "a__a") r is now [1, 0, 1]
Specify an escape character to include either
%
or_
in the search. Theescape_character
is expected to be either 0 or 1 characters. If more than one character is specified only the first character is used.Example: s = ["abc_def", "abc1def", "abc_"] r = like(s, "abc/_d%", "/") r is now [1, 0, 0]
Any null string entries return corresponding null output column entries.
- Throws:
cudf::logic_error – if
pattern
orescape_character
is invalid- Parameters:
input – Strings instance for this operation
pattern – Like pattern to match within each string
escape_character – Optional character specifies the escape prefix. Default is no escape character.
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New boolean column
-
std::unique_ptr<column> like(strings_column_view const &input, strings_column_view const &patterns, string_scalar const &escape_character = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns a boolean column identifying rows which match the corresponding like pattern in the given patterns.
The like pattern expects only 2 wildcard special characters:
%
zero or more of any character_
any single character
Example: s = ["azaa", "ababaabba", "aaxa"] p = ["%a", "b%", "__x_"] r = like(s, p) r is now [1, 0, 1]
Specify an escape character to include either
%
or_
in the search. Theescape_character
is expected to be either 0 or 1 characters. If more than one character is specified only the first character is used. The escape character is applied to all patterns.Any null string entries return corresponding null output column entries.
- Throws:
cudf::logic_error – if
patterns
contains nulls orescape_character
is invalidcudf::logic_error – if
patterns.size() != input.size()
- Parameters:
input – Strings instance for this operation
patterns – Like patterns to match within each corresponding string
escape_character – Optional character specifies the escape prefix. Default is no escape character.
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New boolean column
-
std::unique_ptr<column> findall(strings_column_view const &input, regex_program const &prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns a lists column of strings for each matching occurrence using the regex_program pattern within each string.
Each output row includes all the substrings within the corresponding input row that match the given pattern. If no matches are found, the output row is empty.
Example: s = ["bunny", "rabbit", "hare", "dog"] p = regex_program::create("[ab]") r = findall(s, p) r is now a lists column like: [ ["b"] ["a","b","b"] ["a"] [] ]
A null output row occurs if the corresponding input row is null.
See the Regex Features page for details on patterns supported by this API.
- Parameters:
input – Strings instance for this operation
prog – Regex program instance
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New lists column of strings
-
std::unique_ptr<column> find_re(strings_column_view const &input, regex_program const &prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Returns the starting character index of the first match for the given pattern in each row of the input column.
Example: s = ["bunny", "rabbit", "hare", "dog"] p = regex_program::create("[be]") r = find_re(s, p) r is now [0, 2, 3, -1]
A null output row occurs if the corresponding input row is null. A -1 is returned for rows that do not contain a match.
See the Regex Features page for details on patterns supported by this API.
- Parameters:
input – Strings instance for this operation
prog – Regex program instance
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
New column of integers
-
std::unique_ptr<column> contains_re(strings_column_view const &input, regex_program const &prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#