24 #include <kvikio/bounce_buffer.hpp>
25 #include <kvikio/error.hpp>
26 #include <kvikio/shim/cuda.hpp>
27 #include <kvikio/utils.hpp>
29 namespace kvikio::detail {
34 enum class IOOperationType : uint8_t {
42 enum class PartialIO : uint8_t {
55 std::map<std::pair<CUcontext, std::thread::id>, CUstream> _streams;
68 KVIKIO_EXPORT
static CUstream get(CUcontext ctx, std::thread::id thd_id);
70 static CUstream get();
90 template <IOOperationType Operation, PartialIO PartialIOStatus>
91 ssize_t posix_host_io(
int fd,
const void* buf,
size_t count, off_t offset)
93 off_t cur_offset = offset;
94 size_t byte_remaining = count;
95 char* buffer =
const_cast<char*
>(
static_cast<const char*
>(buf));
96 while (byte_remaining > 0) {
98 if constexpr (Operation == IOOperationType::READ) {
99 nbytes = ::pread(fd, buffer, byte_remaining, cur_offset);
101 nbytes = ::pwrite(fd, buffer, byte_remaining, cur_offset);
104 const std::string name = Operation == IOOperationType::READ ?
"pread" :
"pwrite";
105 if (errno == EBADF) {
106 throw CUfileException{std::string{
"POSIX error on " + name +
" at: "} + __FILE__ +
":" +
107 KVIKIO_STRINGIFY(__LINE__) +
": Operation not permitted"};
109 throw CUfileException{std::string{
"POSIX error on " + name +
" at: "} + __FILE__ +
":" +
110 KVIKIO_STRINGIFY(__LINE__) +
": " + strerror(errno)};
112 if constexpr (Operation == IOOperationType::READ) {
114 throw CUfileException{std::string{
"POSIX error on pread at: "} + __FILE__ +
":" +
115 KVIKIO_STRINGIFY(__LINE__) +
": EOF"};
118 if constexpr (PartialIOStatus == PartialIO::YES) {
return nbytes; }
120 cur_offset += nbytes;
121 byte_remaining -= nbytes;
123 return convert_size2ssize(count);
137 template <IOOperationType Operation>
138 std::size_t posix_device_io(
int fd,
139 const void* devPtr_base,
141 std::size_t file_offset,
142 std::size_t devPtr_offset)
144 auto alloc = AllocRetain::instance().get();
145 CUdeviceptr devPtr = convert_void2deviceptr(devPtr_base) + devPtr_offset;
146 off_t cur_file_offset = convert_size2off(file_offset);
147 off_t byte_remaining = convert_size2off(size);
148 const off_t chunk_size2 = convert_size2off(alloc.size());
151 CUstream stream = StreamsByThread::get();
153 while (byte_remaining > 0) {
154 const off_t nbytes_requested = std::min(chunk_size2, byte_remaining);
155 ssize_t nbytes_got = nbytes_requested;
156 if constexpr (Operation == IOOperationType::READ) {
157 nbytes_got = posix_host_io<IOOperationType::READ, PartialIO::YES>(
158 fd, alloc.get(), nbytes_requested, cur_file_offset);
159 CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoDAsync(devPtr, alloc.get(), nbytes_got, stream));
160 CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream));
163 cudaAPI::instance().MemcpyDtoHAsync(alloc.get(), devPtr, nbytes_requested, stream));
164 CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(stream));
165 posix_host_io<IOOperationType::WRITE, PartialIO::NO>(
166 fd, alloc.get(), nbytes_requested, cur_file_offset);
168 cur_file_offset += nbytes_got;
169 devPtr += nbytes_got;
170 byte_remaining -= nbytes_got;
189 template <PartialIO PartialIOStatus>
190 std::size_t posix_host_read(
int fd,
void* buf, std::size_t size, std::size_t file_offset)
192 KVIKIO_NVTX_SCOPED_RANGE(
"posix_host_read()", size);
193 return detail::posix_host_io<IOOperationType::READ, PartialIOStatus>(
194 fd, buf, size, convert_size2off(file_offset));
211 template <PartialIO PartialIOStatus>
212 std::size_t posix_host_write(
int fd,
const void* buf, std::size_t size, std::size_t file_offset)
214 KVIKIO_NVTX_SCOPED_RANGE(
"posix_host_write()", size);
215 return detail::posix_host_io<IOOperationType::WRITE, PartialIOStatus>(
216 fd, buf, size, convert_size2off(file_offset));
232 std::size_t posix_device_read(
int fd,
233 const void* devPtr_base,
235 std::size_t file_offset,
236 std::size_t devPtr_offset);
251 std::size_t posix_device_write(
int fd,
252 const void* devPtr_base,
254 std::size_t file_offset,
255 std::size_t devPtr_offset);
Singleton class to retrieve a CUDA stream for device-host copying.