rapidsmpf: /__w/rapidsmpf/rapidsmpf/cpp/include/rapidsmpf/cuda_stream.hpp Source File

 #pragma once


 #include <memory>

 #include <ranges>


 #include <rapidsmpf/cuda_event.hpp>


 namespace rapidsmpf {


 template <typename Range1, typename Range2>

 void cuda_stream_join(

     Range1 const& downstreams, Range2 const& upstreams, CudaEvent* event = nullptr

 ) {

     // Quick exit if all streams are identical.

     if ([&] {

             for (rmm::cuda_stream_view const& upstream : upstreams) {

                 for (rmm::cuda_stream_view const& downstream : downstreams) {

                     if (upstream.value() != downstream.value()) {

                         return false;

                     }

                 }

             }

             return true;

         }())

     {

         return;

     }


     // Create a temporary CUDA event if none was provided. Note, once the event

     // has been used to record synchronization between streams, it can be safely

     // destroyed without affecting the synchronization.

     std::unique_ptr<CudaEvent> tmp_event;

     if (event == nullptr) {

         tmp_event = std::make_unique<CudaEvent>();

         event = tmp_event.get();

     }


     // Let all downstreams wait on all upstreams.

     for (rmm::cuda_stream_view const& upstream : upstreams) {

         event->record(upstream);

         for (rmm::cuda_stream_view const& downstream : downstreams) {

             if (upstream.value() != downstream.value()) {

                 event->stream_wait(downstream);

             }

         }

     }

 }


 inline void cuda_stream_join(

     rmm::cuda_stream_view downstream,

     rmm::cuda_stream_view upstream,

     CudaEvent* event = nullptr

 ) {

     return cuda_stream_join(

         std::views::single(downstream), std::views::single(upstream), event

     );

 }


 }  // namespace rapidsmpf

rapidsmpf::CudaEvent
RAII wrapper for a CUDA event with convenience methods.
Definition: cuda_event.hpp:35

rmm::cuda_stream_view

rapidsmpf
RAPIDS Multi-Processor interfaces.
Definition: backend.hpp:14

rapidsmpf::cuda_stream_join
void cuda_stream_join(Range1 const &downstreams, Range2 const &upstreams, CudaEvent *event=nullptr)
Make downstream CUDA streams wait on upstream CUDA streams.
Definition: cuda_stream.hpp:34