24 #include <kvikio/bounce_buffer.hpp>
25 #include <kvikio/error.hpp>
26 #include <kvikio/nvtx.hpp>
27 #include <kvikio/shim/cuda.hpp>
28 #include <kvikio/utils.hpp>
30 namespace kvikio::detail {
35 enum class IOOperationType : uint8_t {
43 enum class PartialIO : uint8_t {
56 std::map<std::pair<CUcontext, std::thread::id>, CUstream> _streams;
69 KVIKIO_EXPORT
static CUstream get(CUcontext ctx, std::thread::id thd_id);
71 static CUstream get();
91 template <IOOperationType Operation, PartialIO PartialIOStatus>
92 ssize_t posix_host_io(
int fd,
void const* buf,
size_t count, off_t offset)
94 off_t cur_offset = offset;
95 size_t byte_remaining = count;
96 char* buffer =
const_cast<char*
>(
static_cast<char const*
>(buf));
97 while (byte_remaining > 0) {
99 if constexpr (Operation == IOOperationType::READ) {
100 nbytes = ::pread(fd, buffer, byte_remaining, cur_offset);
102 nbytes = ::pwrite(fd, buffer, byte_remaining, cur_offset);
105 std::string
const name = (Operation == IOOperationType::READ) ?
"pread" :
"pwrite";
106 KVIKIO_EXPECT(errno != EBADF,
"POSIX error: Operation not permitted");
107 KVIKIO_FAIL(
"POSIX error on " + name +
": " + strerror(errno));
109 if constexpr (Operation == IOOperationType::READ) {
112 if constexpr (PartialIOStatus == PartialIO::YES) {
return nbytes; }
114 cur_offset += nbytes;
115 byte_remaining -= nbytes;
117 return convert_size2ssize(count);
131 template <IOOperationType Operation>
132 std::size_t posix_device_io(
int fd,
133 void const* devPtr_base,
135 std::size_t file_offset,
136 std::size_t devPtr_offset)
138 auto alloc = AllocRetain::instance().get();
139 CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset;
140 off_t cur_file_offset = convert_size2off(file_offset);
141 off_t byte_remaining = convert_size2off(size);
142 off_t
const chunk_size2 = convert_size2off(alloc.size());
145 CUstream stream = StreamsByThread::get();
147 while (byte_remaining > 0) {
148 off_t
const nbytes_requested = std::min(chunk_size2, byte_remaining);
149 ssize_t nbytes_got = nbytes_requested;
150 if constexpr (Operation == IOOperationType::READ) {
151 nbytes_got = posix_host_io<IOOperationType::READ, PartialIO::YES>(
152 fd, alloc.get(), nbytes_requested, cur_file_offset);
153 CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoDAsync(devPtr, alloc.get(), nbytes_got, stream));
157 cudaAPI::instance().MemcpyDtoHAsync(alloc.get(), devPtr, nbytes_requested, stream));
159 posix_host_io<IOOperationType::WRITE, PartialIO::NO>(
160 fd, alloc.get(), nbytes_requested, cur_file_offset);
162 cur_file_offset += nbytes_got;
163 devPtr += nbytes_got;
164 byte_remaining -= nbytes_got;
183 template <PartialIO PartialIOStatus>
184 std::size_t posix_host_read(
int fd,
void* buf, std::size_t size, std::size_t file_offset)
186 KVIKIO_NVTX_SCOPED_RANGE(
"posix_host_read()", size);
187 return detail::posix_host_io<IOOperationType::READ, PartialIOStatus>(
188 fd, buf, size, convert_size2off(file_offset));
205 template <PartialIO PartialIOStatus>
206 std::size_t posix_host_write(
int fd,
void const* buf, std::size_t size, std::size_t file_offset)
208 KVIKIO_NVTX_SCOPED_RANGE(
"posix_host_write()", size);
209 return detail::posix_host_io<IOOperationType::WRITE, PartialIOStatus>(
210 fd, buf, size, convert_size2off(file_offset));
226 std::size_t posix_device_read(
int fd,
227 void const* devPtr_base,
229 std::size_t file_offset,
230 std::size_t devPtr_offset);
245 std::size_t posix_device_write(
int fd,
246 void const* devPtr_base,
248 std::size_t file_offset,
249 std::size_t devPtr_offset);
Singleton class to retrieve a CUDA stream for device-host copying.
#define KVIKIO_EXPECT(...)
Macro for checking pre-conditions or conditions that throws an exception when a condition is violated...
#define CUDA_DRIVER_TRY(...)
Error checking macro for CUDA driver API functions.
#define KVIKIO_FAIL(...)
Indicates that an erroneous code path has been taken.