join.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
10 #include <cudf/types.hpp>
12 #include <cudf/utilities/export.hpp>
14 
15 #include <rmm/cuda_stream_view.hpp>
16 #include <rmm/device_uvector.hpp>
17 
18 #include <cuda/std/limits>
19 
20 #include <cstdint>
21 
22 namespace CUDF_EXPORT cudf {
23 
37 enum class join_kind : int32_t {
38  INNER_JOIN = 0,
39  LEFT_JOIN = 1,
40  FULL_JOIN = 2,
41  LEFT_SEMI_JOIN = 3,
42  LEFT_ANTI_JOIN = 4
43 };
44 
55 CUDF_HOST_DEVICE constexpr size_type JoinNoMatch = cuda::std::numeric_limits<size_type>::min();
56 
66  std::unique_ptr<rmm::device_uvector<size_type>>
69 
78  std::unique_ptr<rmm::device_uvector<size_type>> match_counts)
79  : _left_table{left_table}, _match_counts{std::move(match_counts)}
80  {
81  }
82  virtual ~join_match_context() = default;
83 };
84 
96  std::unique_ptr<join_match_context>
100 };
101 
135 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
136  std::unique_ptr<rmm::device_uvector<size_type>>>
137 inner_join(cudf::table_view const& left_keys,
138  cudf::table_view const& right_keys,
139  null_equality compare_nulls = null_equality::EQUAL,
142 
176 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
177  std::unique_ptr<rmm::device_uvector<size_type>>>
178 left_join(cudf::table_view const& left_keys,
179  cudf::table_view const& right_keys,
180  null_equality compare_nulls = null_equality::EQUAL,
183 
217 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
218  std::unique_ptr<rmm::device_uvector<size_type>>>
219 full_join(cudf::table_view const& left_keys,
220  cudf::table_view const& right_keys,
221  null_equality compare_nulls = null_equality::EQUAL,
224 
248 std::unique_ptr<cudf::table> cross_join(
249  cudf::table_view const& left,
250  cudf::table_view const& right,
253 
322 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
323  std::unique_ptr<rmm::device_uvector<size_type>>>
325  cudf::table_view const& right,
328  cudf::ast::expression const& predicate,
332  // end of group
334 
335 } // namespace CUDF_EXPORT cudf
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > left_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to a left join between the specified tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > full_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to a full join between the specified tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > filter_join_indices(cudf::table_view const &left, cudf::table_view const &right, cudf::device_span< size_type const > left_indices, cudf::device_span< size_type const > right_indices, cudf::ast::expression const &predicate, cudf::join_kind join_kind, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Filters join result indices based on a conditional predicate and join type.
constexpr CUDF_HOST_DEVICE size_type JoinNoMatch
Sentinel value used to indicate an unmatched row index in join operations.
Definition: join.hpp:55
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > inner_join(cudf::table_view const &left_keys, cudf::table_view const &right_keys, null_equality compare_nulls=null_equality::EQUAL, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Returns a pair of row index vectors corresponding to an inner join between the specified tables.
join_kind
Specifies the type of join operation to perform.
Definition: join.hpp:37
std::unique_ptr< cudf::table > cross_join(cudf::table_view const &left, cudf::table_view const &right, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Performs a cross join on two tables (left, right)
@ LEFT_ANTI_JOIN
Left anti join: left rows that have no matches in right table.
@ LEFT_SEMI_JOIN
Left semi join: left rows that have matches in right table.
@ FULL_JOIN
Full outer join: all rows from both tables.
@ LEFT_JOIN
Left join: all rows from left table plus matching rows from right.
@ INNER_JOIN
Inner join: only matching rows from both tables.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
cuda::std::span< T, Extent > device_span
Device span is an alias of cuda::std::span.
Definition: span.hpp:320
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:140
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:61
Holds context information about matches between tables during a join operation.
Definition: join.hpp:64
std::unique_ptr< rmm::device_uvector< size_type > > _match_counts
Definition: join.hpp:67
table_view _left_table
View of the left table involved in the join operation.
Definition: join.hpp:65
join_match_context(table_view left_table, std::unique_ptr< rmm::device_uvector< size_type >> match_counts)
Construct a join_match_context.
Definition: join.hpp:77
virtual ~join_match_context()=default
Virtual destructor for proper polymorphic deletion.
Stores context information for partitioned join operations.
Definition: join.hpp:95
size_type left_start_idx
The starting row index of the current left table partition.
Definition: join.hpp:98
std::unique_ptr< join_match_context > left_table_context
The match context from a previous inner_join_match_context call.
Definition: join.hpp:97
size_type left_end_idx
The ending row index (exclusive) of the current left table partition.
Definition: join.hpp:99
Class definitions for (mutable)_table_view
Type declarations for libcudf.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:21