prefetch.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <rmm/cuda_device.hpp>
20 #include <rmm/cuda_stream_view.hpp>
21 #include <rmm/detail/error.hpp>
22 #include <rmm/detail/export.hpp>
23 #include <rmm/error.hpp>
24 
25 #include <cuda/std/span>
26 
27 namespace RMM_NAMESPACE {
28 
47 void prefetch(void const* ptr,
48  std::size_t size,
49  rmm::cuda_device_id device,
50  rmm::cuda_stream_view stream)
51 {
52  auto result = cudaMemPrefetchAsync(ptr, size, device.value(), stream.value());
53  // InvalidValue error is raised when non-managed memory is passed to cudaMemPrefetchAsync
54  // We should treat this as a no-op
55  if (result != cudaErrorInvalidValue && result != cudaSuccess) { RMM_CUDA_TRY(result); }
56 }
57 
69 template <typename T>
70 void prefetch(cuda::std::span<T const> data,
71  rmm::cuda_device_id device,
72  rmm::cuda_stream_view stream)
73 {
74  prefetch(data.data(), data.size_bytes(), device, stream);
75 }
76  // end of group
78 
79 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
cudaStream_t value() const noexcept
Get the wrapped stream.
void prefetch(cuda::std::span< T const > data, rmm::cuda_device_id device, rmm::cuda_stream_view stream)
Prefetch a span of memory to the specified device on the specified stream.
Definition: prefetch.hpp:70
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:38
constexpr value_type value() const noexcept
The wrapped integer value.
Definition: cuda_device.hpp:54