join.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
10 #include <cudf/types.hpp>
12 #include <cudf/utilities/export.hpp>
14 
15 #include <rmm/cuda_stream_view.hpp>
16 #include <rmm/device_uvector.hpp>
17 
18 #include <cuda/std/limits>
19 
20 #include <cstdint>
21 
22 namespace CUDF_EXPORT cudf {
23 
37 enum class join_kind : int32_t {
38  INNER_JOIN = 0,
39  LEFT_JOIN = 1,
40  FULL_JOIN = 2,
41  LEFT_SEMI_JOIN = 3,
42  LEFT_ANTI_JOIN = 4
43 };
44 
51 enum class join_prefilter : bool { NO = false, YES = true };
52 
63 CUDF_HOST_DEVICE constexpr size_type JoinNoMatch = cuda::std::numeric_limits<size_type>::min();
64 
74  std::unique_ptr<rmm::device_uvector<size_type>>
77 
85  join_match_context(table_view const& left_table, // NOLINT(modernize-pass-by-value)
86  std::unique_ptr<rmm::device_uvector<size_type>> match_counts)
87  : _left_table{left_table}, _match_counts{std::move(match_counts)}
88  {
89  }
90  virtual ~join_match_context() = default;
91 };
92 
104  std::unique_ptr<join_match_context>
108 };
109 
143 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
144  std::unique_ptr<rmm::device_uvector<size_type>>>
145 inner_join(cudf::table_view const& left_keys,
146  cudf::table_view const& right_keys,
147  null_equality compare_nulls = null_equality::EQUAL,
150 
184 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
185  std::unique_ptr<rmm::device_uvector<size_type>>>
186 left_join(cudf::table_view const& left_keys,
187  cudf::table_view const& right_keys,
188  null_equality compare_nulls = null_equality::EQUAL,
191 
225 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
226  std::unique_ptr<rmm::device_uvector<size_type>>>
227 full_join(cudf::table_view const& left_keys,
228  cudf::table_view const& right_keys,
229  null_equality compare_nulls = null_equality::EQUAL,
232 
256 std::unique_ptr<cudf::table> cross_join(
257  cudf::table_view const& left,
258  cudf::table_view const& right,
261 
330 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
331  std::unique_ptr<rmm::device_uvector<size_type>>>
333  cudf::table_view const& right,
336  cudf::ast::expression const& predicate,
340 
403 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
404  std::unique_ptr<rmm::device_uvector<size_type>>>
406  cudf::table_view const& left,
407  cudf::table_view const& right,
410  std::string const& predicate_code,
412  bool is_ptx = false,
415 
432 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
433  std::unique_ptr<rmm::device_uvector<size_type>>>
435  cudf::table_view const& left,
436  cudf::table_view const& right,
439  cudf::ast::expression const& predicate,
443  // end of group
445 
446 } // namespace CUDF_EXPORT cudf
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
join_prefilter
Specifies whether a join implementation should apply an optional probe-side prefilter.
Definition: join.hpp:51
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > left_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to a left join between the specified tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > full_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to a full join between the specified tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > filter_join_indices(cudf::table_view const &left, cudf::table_view const &right, cudf::device_span< size_type const > left_indices, cudf::device_span< size_type const > right_indices, cudf::ast::expression const &predicate, cudf::join_kind join_kind, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters join result indices based on a conditional predicate and join type.
constexpr CUDF_HOST_DEVICE size_type JoinNoMatch
Sentinel value used to indicate an unmatched row index in join operations.
Definition: join.hpp:63
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > filter_join_indices_jit(cudf::table_view const &left, cudf::table_view const &right, cudf::device_span< size_type const > left_indices, cudf::device_span< size_type const > right_indices, cudf::ast::expression const &predicate, cudf::join_kind join_kind, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters join indices using a JIT-compiled predicate from an AST expression.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > inner_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to an inner join between the specified tables.
join_kind
Specifies the type of join operation to perform.
Definition: join.hpp:37
std::unique_ptr< cudf::table > cross_join(cudf::table_view const &left, cudf::table_view const &right, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Performs a cross join on two tables (left, right)
@ LEFT_ANTI_JOIN
Left anti join: left rows that have no matches in right table.
@ LEFT_SEMI_JOIN
Left semi join: left rows that have matches in right table.
@ FULL_JOIN
Full outer join: all rows from both tables.
@ LEFT_JOIN
Left join: all rows from left table plus matching rows from right.
@ INNER_JOIN
Inner join: only matching rows from both tables.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuda::std::span< T, Extent > device_span
Device span is an alias of cuda::std::span.
Definition: span.hpp:320
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:140
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:62
Holds context information about matches between tables during a join operation.
Definition: join.hpp:72
std::unique_ptr< rmm::device_uvector< size_type > > _match_counts
Definition: join.hpp:75
table_view _left_table
View of the left table involved in the join operation.
Definition: join.hpp:73
join_match_context(table_view const &left_table, std::unique_ptr< rmm::device_uvector< size_type >> match_counts)
Construct a join_match_context.
Definition: join.hpp:85
virtual ~join_match_context()=default
Virtual destructor for proper polymorphic deletion.
Stores context information for partitioned join operations.
Definition: join.hpp:103
size_type left_start_idx
The starting row index of the current left table partition.
Definition: join.hpp:106
std::unique_ptr< join_match_context > left_table_context
The match context from a previous inner_join_match_context call.
Definition: join.hpp:105
size_type left_end_idx
The ending row index (exclusive) of the current left table partition.
Definition: join.hpp:107
Class definitions for (mutable)_table_view
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:21