hash_join.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/hashing.hpp>
9 #include <cudf/join/join.hpp>
11 #include <cudf/types.hpp>
13 #include <cudf/utilities/export.hpp>
15 
16 #include <rmm/cuda_stream_view.hpp>
17 #include <rmm/device_uvector.hpp>
18 
19 #include <optional>
20 #include <utility>
21 
22 namespace CUDF_EXPORT cudf {
23 
30 // forward declaration
31 namespace hashing::detail {
35 template <typename T>
37 } // namespace hashing::detail
38 
39 namespace detail {
43 template <typename T>
44 class hash_join;
45 } // namespace detail
46 
55 enum class nullable_join : bool { YES, NO };
56 
64 class hash_join {
65  public:
68 
69  hash_join() = delete;
70  ~hash_join();
71  hash_join(hash_join const&) = delete;
72  hash_join(hash_join&&) = delete;
73  hash_join& operator=(hash_join const&) = delete;
74  hash_join& operator=(hash_join&&) = delete;
75 
89  null_equality compare_nulls,
91 
104  null_equality compare_nulls,
105  double load_factor,
107 
126  [[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
127  std::unique_ptr<rmm::device_uvector<size_type>>>
129  std::optional<std::size_t> output_size = {},
132 
151  [[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
152  std::unique_ptr<rmm::device_uvector<size_type>>>
154  std::optional<std::size_t> output_size = {},
157 
176  [[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
177  std::unique_ptr<rmm::device_uvector<size_type>>>
179  std::optional<std::size_t> output_size = {},
182 
196  [[nodiscard]] std::size_t inner_join_size(
197  cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
198 
212  [[nodiscard]] std::size_t left_join_size(
213  cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
214 
230  [[nodiscard]] std::size_t full_join_size(
231  cudf::table_view const& probe,
234 
257  cudf::table_view const& probe,
260 
282  cudf::table_view const& probe,
285 
307  cudf::table_view const& probe,
310 
311  private:
312  std::unique_ptr<impl_type const> _impl;
313 };
314  // end of group
316 
317 } // namespace CUDF_EXPORT cudf
Forward declaration for our hash join.
Definition: hash_join.hpp:44
Hash join that builds hash table in creation and probes results in subsequent *_join member functions...
Definition: hash_join.hpp:64
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > left_join(cudf::table_view const &probe, std::optional< std::size_t > output_size={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
typename cudf::detail::hash_join< cudf::hashing::detail::MurmurHash3_x86_32< cudf::hash_value_type > > impl_type
Implementation type.
Definition: hash_join.hpp:67
std::size_t left_join_size(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream()) const
std::size_t inner_join_size(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream()) const
hash_join(cudf::table_view const &build, null_equality compare_nulls, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct a hash join object for subsequent probe calls.
hash_join(cudf::table_view const &build, nullable_join has_nulls, null_equality compare_nulls, double load_factor, rmm::cuda_stream_view stream=cudf::get_default_stream())
Construct a hash join object for subsequent probe calls.
cudf::join_match_context full_join_match_context(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Returns context information about matches between the probe and build tables.
cudf::join_match_context inner_join_match_context(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Returns context information about matches between the probe and build tables.
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > inner_join(cudf::table_view const &probe, std::optional< std::size_t > output_size={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
std::pair< std::unique_ptr< rmm::device_uvector< size_type > >, std::unique_ptr< rmm::device_uvector< size_type > > > full_join(cudf::table_view const &probe, std::optional< std::size_t > output_size={}, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
cudf::join_match_context left_join_match_context(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Returns context information about matches between the probe and build tables.
std::size_t full_join_size(cudf::table_view const &probe, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Forward declaration for our Murmur Hash 3 implementation.
Definition: hash_join.hpp:36
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
nullable_join
The enum class to specify if any of the input join tables (build table and any later probe table) has...
Definition: hash_join.hpp:55
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:140
cuDF interfaces
Definition: host_udf.hpp:26
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
Holds context information about matches between tables during a join operation.
Definition: join.hpp:46
Class definitions for (mutable)_table_view
Type declarations for libcudf.