Column Search#
- group Searching
Functions
-
std::unique_ptr<column> lower_bound(table_view const &haystack, table_view const &needles, std::vector<order> const &column_order, std::vector<null_order> const &null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Find smallest indices in a sorted table where values should be inserted to maintain order.
For each row in
needles, find the first index inhaystackwhere inserting the row still maintains its sort order.Example: Single column: idx 0 1 2 3 4 haystack = { 10, 20, 20, 30, 50 } needles = { 20 } result = { 1 } Multi Column: idx 0 1 2 3 4 haystack = {{ 10, 20, 20, 20, 20 }, { 5.0, .5, .5, .7, .7 }, { 90, 77, 78, 61, 61 }} needles = {{ 20 }, { .7 }, { 61 }} result = { 3 }- Parameters:
haystack – The table containing search space
needles – Values for which to find the insert locations in the search space
column_order – Vector of column sort order
null_precedence – Vector of null_precedence enums needles
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A non-nullable column of elements containing the insertion points
-
std::unique_ptr<column> upper_bound(table_view const &haystack, table_view const &needles, std::vector<order> const &column_order, std::vector<null_order> const &null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Find largest indices in a sorted table where values should be inserted to maintain order.
For each row in
needles, find the last index inhaystackwhere inserting the row still maintains its sort order.Example: Single Column: idx 0 1 2 3 4 haystack = { 10, 20, 20, 30, 50 } needles = { 20 } result = { 3 } Multi Column: idx 0 1 2 3 4 haystack = {{ 10, 20, 20, 20, 20 }, { 5.0, .5, .5, .7, .7 }, { 90, 77, 78, 61, 61 }} needles = {{ 20 }, { .7 }, { 61 }} result = { 5 }- Parameters:
haystack – The table containing search space
needles – Values for which to find the insert locations in the search space
column_order – Vector of column sort order
null_precedence – Vector of null_precedence enums needles
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A non-nullable column of elements containing the insertion points
-
bool contains(column_view const &haystack, scalar const &needle, rmm::cuda_stream_view stream = cudf::get_default_stream())#
Check if the given
needlevalue exists in thehaystackcolumn.Single Column: idx 0 1 2 3 4 haystack = { 10, 20, 20, 30, 50 } needle = { 20 } result = true- Throws:
cudf::logic_error – If
haystack.type() != needle.type().- Parameters:
haystack – The column containing search space
needle – A scalar value to check for existence in the search space
stream – CUDA stream used for device memory operations and kernel launches
- Returns:
true if the given
needlevalue exists in thehaystackcolumn
-
std::unique_ptr<column> contains(column_view const &haystack, column_view const &needles, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Check if the given
needlesvalues exists in thehaystackcolumn.The new column will have type BOOL and have the same size and null mask as the input
needlescolumn. That is, any null row in theneedlescolumn will result in a nul row in the output column.haystack = { 10, 20, 30, 40, 50 } needles = { 20, 40, 60, 80 } result = { true, true, false, false }- Throws:
cudf::logic_error – If
haystack.type() != needles.type()- Parameters:
haystack – The column containing search space
needles – A column of values to check for existence in the search space
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A BOOL column indicating if each element in
needlesexists in the search space
-
std::unique_ptr<column> lower_bound(table_view const &haystack, table_view const &needles, std::vector<order> const &column_order, std::vector<null_order> const &null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#