cuda_stream.hpp
1 
5 #pragma once
6 
7 #include <memory>
8 #include <ranges>
9 
10 #include <rapidsmpf/cuda_event.hpp>
11 
12 namespace rapidsmpf {
13 
14 
33 template <typename Range1, typename Range2>
35  Range1 const& downstreams, Range2 const& upstreams, CudaEvent* event = nullptr
36 ) {
37  // Quick exit if all streams are identical.
38  if ([&] {
39  for (rmm::cuda_stream_view const& upstream : upstreams) {
40  for (rmm::cuda_stream_view const& downstream : downstreams) {
41  if (upstream.value() != downstream.value()) {
42  return false;
43  }
44  }
45  }
46  return true;
47  }())
48  {
49  return;
50  }
51 
52  // Create a temporary CUDA event if none was provided. Note, once the event
53  // has been used to record synchronization between streams, it can be safely
54  // destroyed without affecting the synchronization.
55  std::unique_ptr<CudaEvent> tmp_event;
56  if (event == nullptr) {
57  tmp_event = std::make_unique<CudaEvent>();
58  event = tmp_event.get();
59  }
60 
61  // Let all downstreams wait on all upstreams.
62  for (rmm::cuda_stream_view const& upstream : upstreams) {
63  event->record(upstream);
64  for (rmm::cuda_stream_view const& downstream : downstreams) {
65  if (upstream.value() != downstream.value()) {
66  event->stream_wait(downstream);
67  }
68  }
69  }
70 }
71 
91 inline void cuda_stream_join(
92  rmm::cuda_stream_view downstream,
93  rmm::cuda_stream_view upstream,
94  CudaEvent* event = nullptr
95 ) {
96  return cuda_stream_join(
97  std::views::single(downstream), std::views::single(upstream), event
98  );
99 }
100 
101 } // namespace rapidsmpf
RAII wrapper for a CUDA event with convenience methods.
Definition: cuda_event.hpp:35
RAPIDS Multi-Processor interfaces.
Definition: backend.hpp:13
void cuda_stream_join(Range1 const &downstreams, Range2 const &upstreams, CudaEvent *event=nullptr)
Make downstream CUDA streams wait on upstream CUDA streams.
Definition: cuda_stream.hpp:34