join.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
10 #include <cudf/types.hpp>
12 #include <cudf/utilities/export.hpp>
14 
15 #include <rmm/cuda_stream_view.hpp>
16 #include <rmm/device_uvector.hpp>
17 
18 #include <cuda/std/limits>
19 
20 #include <cstdint>
21 
22 namespace CUDF_EXPORT cudf {
23 
37 enum class join_kind : int32_t {
38  INNER_JOIN = 0,
39  LEFT_JOIN = 1,
40  FULL_JOIN = 2,
41  LEFT_SEMI_JOIN = 3,
42  LEFT_ANTI_JOIN = 4
43 };
44 
52 enum class join_prefilter : bool { NO = false, YES = true };
53 
64 CUDF_HOST_DEVICE constexpr size_type JoinNoMatch = cuda::std::numeric_limits<size_type>::min();
65 
75  std::unique_ptr<rmm::device_uvector<size_type>>
78 
86  join_match_context(table_view const& left_table, // NOLINT(modernize-pass-by-value)
87  std::unique_ptr<rmm::device_uvector<size_type>> match_counts)
88  : _left_table{left_table}, _match_counts{std::move(match_counts)}
89  {
90  }
91  join_match_context(join_match_context const&) = delete;
92  join_match_context& operator=(join_match_context const&) = delete;
99  virtual ~join_match_context() = default;
100 };
101 
113  std::unique_ptr<join_match_context>
117 };
118 
152 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
153  std::unique_ptr<rmm::device_uvector<size_type>>>
154 inner_join(cudf::table_view const& left_keys,
155  cudf::table_view const& right_keys,
156  null_equality compare_nulls = null_equality::EQUAL,
159 
193 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
194  std::unique_ptr<rmm::device_uvector<size_type>>>
195 left_join(cudf::table_view const& left_keys,
196  cudf::table_view const& right_keys,
197  null_equality compare_nulls = null_equality::EQUAL,
200 
234 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
235  std::unique_ptr<rmm::device_uvector<size_type>>>
236 full_join(cudf::table_view const& left_keys,
237  cudf::table_view const& right_keys,
238  null_equality compare_nulls = null_equality::EQUAL,
241 
265 std::unique_ptr<cudf::table> cross_join(
266  cudf::table_view const& left,
267  cudf::table_view const& right,
270 
340 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
341  std::unique_ptr<rmm::device_uvector<size_type>>>
343  cudf::table_view const& right,
346  cudf::ast::expression const& predicate,
350 
380 [[nodiscard]] std::size_t filter_join_indices_output_size(
381  cudf::table_view const& left,
382  cudf::table_view const& right,
385  cudf::ast::expression const& predicate,
388 
451 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
452  std::unique_ptr<rmm::device_uvector<size_type>>>
454  cudf::table_view const& left,
455  cudf::table_view const& right,
458  std::string const& predicate_code,
460  bool is_ptx = false,
463 
480 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
481  std::unique_ptr<rmm::device_uvector<size_type>>>
483  cudf::table_view const& left,
484  cudf::table_view const& right,
487  cudf::ast::expression const& predicate,
491  // end of group
493 
494 } // namespace CUDF_EXPORT cudf
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
join_prefilter
Specifies whether a join implementation should apply an optional prefilter that reduces candidate row...
Definition: join.hpp:52
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > left_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to a left join between the specified tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > full_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to a full join between the specified tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > filter_join_indices(cudf::table_view const &left, cudf::table_view const &right, cudf::device_span< size_type const > left_indices, cudf::device_span< size_type const > right_indices, cudf::ast::expression const &predicate, cudf::join_kind join_kind, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters join result indices based on a conditional predicate and join type.
constexpr CUDF_HOST_DEVICE size_type JoinNoMatch
Sentinel value used to indicate an unmatched row index in join operations.
Definition: join.hpp:64
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > filter_join_indices_jit(cudf::table_view const &left, cudf::table_view const &right, cudf::device_span< size_type const > left_indices, cudf::device_span< size_type const > right_indices, cudf::ast::expression const &predicate, cudf::join_kind join_kind, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters join indices using a JIT-compiled predicate from an AST expression.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > inner_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to an inner join between the specified tables.
join_kind
Specifies the type of join operation to perform.
Definition: join.hpp:37
std::size_t filter_join_indices_output_size(cudf::table_view const &left, cudf::table_view const &right, cudf::device_span< size_type const > left_indices, cudf::device_span< size_type const > right_indices, cudf::ast::expression const &predicate, cudf::join_kind join_kind, rmm::cuda_stream_view stream=cudf::get_default_stream())
Returns the exact output size of filter_join_indices without materializing the filtered index vectors...
std::unique_ptr< cudf::table > cross_join(cudf::table_view const &left, cudf::table_view const &right, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Performs a cross join on two tables (left, right)
@ LEFT_ANTI_JOIN
Left anti join: left rows that have no matches in right table.
@ LEFT_SEMI_JOIN
Left semi join: left rows that have matches in right table.
@ FULL_JOIN
Full outer join: all rows from both tables.
@ LEFT_JOIN
Left join: all rows from left table plus matching rows from right.
@ INNER_JOIN
Inner join: only matching rows from both tables.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::resource_ref< cuda::mr::device_accessible > device_async_resource_ref
cuda::std::span< T, Extent > device_span
Device span is an alias of cuda::std::span.
Definition: span.hpp:320
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:141
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:85
cuDF interfaces
Definition: host_udf.hpp:26
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:62
Holds context information about matches between tables during a join operation.
Definition: join.hpp:73
join_match_context(join_match_context &&)=default
Move constructor.
join_match_context & operator=(join_match_context &&)=default
Move assignment operator.
std::unique_ptr< rmm::device_uvector< size_type > > _match_counts
Definition: join.hpp:76
table_view _left_table
View of the left table involved in the join operation.
Definition: join.hpp:74
join_match_context(table_view const &left_table, std::unique_ptr< rmm::device_uvector< size_type >> match_counts)
Construct a join_match_context.
Definition: join.hpp:86
virtual ~join_match_context()=default
Virtual destructor for proper polymorphic deletion.
Stores context information for partitioned join operations.
Definition: join.hpp:112
size_type left_start_idx
The starting row index of the current left table partition.
Definition: join.hpp:115
std::unique_ptr< join_match_context > left_table_context
The match context from a previous inner_join_match_context call.
Definition: join.hpp:114
size_type left_end_idx
The ending row index (exclusive) of the current left table partition.
Definition: join.hpp:116
Class definitions for (mutable)_table_view
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:21