APIs for tokenizing and counting tokens in strings columns. More...

#include <cudf/column/column.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/export.hpp>
#include <cudf/utilities/memory_resource.hpp>

Classes
struct	nvtext::tokenize_vocabulary
	Vocabulary object to be used with nvtext::tokenize_with_vocabulary. More...

Namespaces
	nvtext
	NVText APIs.

Functions
std::unique_ptr< cudf::column >	nvtext::tokenize (cudf::strings_column_view const &input, cudf::string_scalar const &delimiter=cudf::string_scalar{""}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Returns a single column of strings by tokenizing the input strings column using the provided characters as delimiters. More...

std::unique_ptr< cudf::column >	nvtext::tokenize (cudf::strings_column_view const &input, cudf::strings_column_view const &delimiters, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Returns a single column of strings by tokenizing the input strings column using multiple strings as delimiters. More...

std::unique_ptr< cudf::column >	nvtext::count_tokens (cudf::strings_column_view const &input, cudf::string_scalar const &delimiter=cudf::string_scalar{""}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Returns the number of tokens in each string of a strings column. More...

std::unique_ptr< cudf::column >	nvtext::count_tokens (cudf::strings_column_view const &input, cudf::strings_column_view const &delimiters, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Returns the number of tokens in each string of a strings column by using multiple strings delimiters to identify tokens in each string. More...

std::unique_ptr< cudf::column >	nvtext::character_tokenize (cudf::strings_column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Returns a single column of strings by converting each character to a string. More...

std::unique_ptr< cudf::column >	nvtext::detokenize (cudf::strings_column_view const &input, cudf::column_view const &row_indices, cudf::string_scalar const &separator=cudf::string_scalar(" "), rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Creates a strings column from a strings column of tokens and an associated column of row ids. More...

std::unique_ptr< tokenize_vocabulary >	nvtext::load_vocabulary (cudf::strings_column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Create a tokenize_vocabulary object from a strings column. More...

std::unique_ptr< cudf::column >	nvtext::tokenize_with_vocabulary (cudf::strings_column_view const &input, tokenize_vocabulary const &vocabulary, cudf::string_scalar const &delimiter, cudf::size_type default_id=-1, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
	Returns the token ids for the input string by looking up each delimited token in the given vocabulary. More...

Detailed Description

APIs for tokenizing and counting tokens in strings columns.

Definition in file tokenize.hpp.

Classes

Namespaces

Functions

Detailed Description