hash_join.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/hashing.hpp>
20 #include <cudf/join/join.hpp>
22 #include <cudf/types.hpp>
24 #include <cudf/utilities/export.hpp>
26 
27 #include <rmm/cuda_stream_view.hpp>
28 #include <rmm/device_uvector.hpp>
29 
30 #include <optional>
31 #include <utility>
32 
33 namespace CUDF_EXPORT cudf {
34 
41 // forward declaration
42 namespace hashing::detail {
46 template <typename T>
48 } // namespace hashing::detail
49 
50 namespace detail {
54 template <typename T>
55 class hash_join;
56 } // namespace detail
57 
66 enum class nullable_join : bool { YES, NO };
67 
75 class hash_join {
76  public:
79 
80  hash_join() = delete;
81  ~hash_join();
82  hash_join(hash_join const&) = delete;
83  hash_join(hash_join&&) = delete;
84  hash_join& operator=(hash_join const&) = delete;
85  hash_join& operator=(hash_join&&) = delete;
86 
100  null_equality compare_nulls,
102 
115  null_equality compare_nulls,
116  double load_factor,
118 
137  [[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
138  std::unique_ptr<rmm::device_uvector<size_type>>>
140  std::optional<std::size_t> output_size = {},
143 
162  [[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
163  std::unique_ptr<rmm::device_uvector<size_type>>>
165  std::optional<std::size_t> output_size = {},
168 
187  [[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
188  std::unique_ptr<rmm::device_uvector<size_type>>>
190  std::optional<std::size_t> output_size = {},
193 
207  [[nodiscard]] std::size_t inner_join_size(
208  cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
209 
223  [[nodiscard]] std::size_t left_join_size(
224  cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
225 
241  [[nodiscard]] std::size_t full_join_size(
242  cudf::table_view const& probe,
245 
268  cudf::table_view const& probe,
271 
293  cudf::table_view const& probe,
296 
318  cudf::table_view const& probe,
321 
322  private:
323  std::unique_ptr<impl_type const> _impl;
324 };
325  // end of group
327 
328 } // namespace CUDF_EXPORT cudf
Forward declaration for our hash join.
Definition: hash_join.hpp:55
Hash join that builds hash table in creation and probes results in subsequent *_join member functions...
Definition: hash_join.hpp:75
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > left_join(cudf::table_view const &probe, std::optional< std::size_t > output_size={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
typename cudf::detail::hash_join< cudf::hashing::detail::MurmurHash3_x86_32< cudf::hash_value_type > > impl_type
Implementation type.
Definition: hash_join.hpp:78
std::size_t left_join_size(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream()) const
std::size_t inner_join_size(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream()) const
hash_join(cudf::table_view const &build, null_equality compare_nulls, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct a hash join object for subsequent probe calls.
hash_join(cudf::table_view const &build, nullable_join has_nulls, null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct a hash join object for subsequent probe calls.
cudf::join_match_context full_join_match_context(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Returns context information about matches between the probe and build tables.
cudf::join_match_context inner_join_match_context(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Returns context information about matches between the probe and build tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > inner_join(cudf::table_view const &probe, std::optional< std::size_t > output_size={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > full_join(cudf::table_view const &probe, std::optional< std::size_t > output_size={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
cudf::join_match_context left_join_match_context(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Returns context information about matches between the probe and build tables.
std::size_t full_join_size(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Forward declaration for our Murmur Hash 3 implementation.
Definition: hash_join.hpp:47
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
nullable_join
The enum class to specify if any of the input join tables (build table and any later probe table) has...
Definition: hash_join.hpp:66
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:151
cuDF interfaces
Definition: host_udf.hpp:37
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
Holds context information about matches between tables during a join operation.
Definition: join.hpp:43
Class definitions for (mutable)_table_view
Type declarations for libcudf.