Column Search#
- group column_search
Functions
-
std::unique_ptr<column> lower_bound(table_view const &haystack, table_view const &needles, std::vector<order> const &column_order, std::vector<null_order> const &null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Find smallest indices in a sorted table where values should be inserted to maintain order.
For each row in
needles
, find the first index inhaystack
where inserting the row still maintains its sort order.Example: Single column: idx 0 1 2 3 4 haystack = { 10, 20, 20, 30, 50 } needles = { 20 } result = { 1 } Multi Column: idx 0 1 2 3 4 haystack = {{ 10, 20, 20, 20, 20 }, { 5.0, .5, .5, .7, .7 }, { 90, 77, 78, 61, 61 }} needles = {{ 20 }, { .7 }, { 61 }} result = { 3 }
- Parameters:
haystack – The table containing search space
needles – Values for which to find the insert locations in the search space
column_order – Vector of column sort order
null_precedence – Vector of null_precedence enums needles
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A non-nullable column of elements containing the insertion points
-
std::unique_ptr<column> upper_bound(table_view const &haystack, table_view const &needles, std::vector<order> const &column_order, std::vector<null_order> const &null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Find largest indices in a sorted table where values should be inserted to maintain order.
For each row in
needles
, find the last index inhaystack
where inserting the row still maintains its sort order.Example: Single Column: idx 0 1 2 3 4 haystack = { 10, 20, 20, 30, 50 } needles = { 20 } result = { 3 } Multi Column: idx 0 1 2 3 4 haystack = {{ 10, 20, 20, 20, 20 }, { 5.0, .5, .5, .7, .7 }, { 90, 77, 78, 61, 61 }} needles = {{ 20 }, { .7 }, { 61 }} result = { 5 }
- Parameters:
haystack – The table containing search space
needles – Values for which to find the insert locations in the search space
column_order – Vector of column sort order
null_precedence – Vector of null_precedence enums needles
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A non-nullable column of elements containing the insertion points
-
bool contains(column_view const &haystack, scalar const &needle, rmm::cuda_stream_view stream = cudf::get_default_stream())#
Check if the given
needle
value exists in thehaystack
column.Single Column: idx 0 1 2 3 4 haystack = { 10, 20, 20, 30, 50 } needle = { 20 } result = true
- Throws:
cudf::logic_error – If
haystack.type() != needle.type()
.- Parameters:
haystack – The column containing search space
needle – A scalar value to check for existence in the search space
stream – CUDA stream used for device memory operations and kernel launches
- Returns:
true if the given
needle
value exists in thehaystack
column
-
std::unique_ptr<column> contains(column_view const &haystack, column_view const &needles, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#
Check if the given
needles
values exists in thehaystack
column.The new column will have type BOOL and have the same size and null mask as the input
needles
column. That is, any null row in theneedles
column will result in a nul row in the output column.haystack = { 10, 20, 30, 40, 50 } needles = { 20, 40, 60, 80 } result = { true, true, false, false }
- Throws:
cudf::logic_error – If
haystack.type() != needles.type()
- Parameters:
haystack – The column containing search space
needles – A column of values to check for existence in the search space
stream – CUDA stream used for device memory operations and kernel launches
mr – Device memory resource used to allocate the returned column’s device memory
- Returns:
A BOOL column indicating if each element in
needles
exists in the search space
-
std::unique_ptr<column> lower_bound(table_view const &haystack, table_view const &needles, std::vector<order> const &column_order, std::vector<null_order> const &null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref())#