10 #include <cudf/utilities/export.hpp>
19 namespace CUDF_EXPORT
cudf {
34 template <
typename Container>
48 [[nodiscard]]
virtual size_t size()
const = 0;
55 [[nodiscard]]
virtual uint8_t
const*
data()
const = 0;
82 template <
typename Container>
83 static std::unique_ptr<buffer>
create(Container&& data_owner)
85 return std::make_unique<owning_buffer<Container>>(std::forward<Container>(data_owner));
106 static std::unique_ptr<datasource>
create(std::string
const& filepath,
108 size_t max_size_estimate = 0,
109 std::optional<std::size_t> known_size = std::nullopt);
141 template <
typename T>
142 static std::vector<std::unique_ptr<datasource>>
create(std::vector<T>
const& args)
144 std::vector<std::unique_ptr<datasource>> sources;
145 sources.reserve(args.size());
146 std::transform(args.cbegin(), args.cend(), std::back_inserter(sources), [](
auto const& arg) {
147 return datasource::create(arg);
165 virtual std::unique_ptr<datasource::buffer>
host_read(
size_t offset,
size_t size) = 0;
179 virtual std::future<std::unique_ptr<datasource::buffer>>
host_read_async(
size_t offset,
191 virtual size_t host_read(
size_t offset,
size_t size, uint8_t* dst) = 0;
207 virtual std::future<size_t>
host_read_async(
size_t offset,
size_t size, uint8_t* dst);
231 return supports_device_read();
250 virtual std::unique_ptr<datasource::buffer>
device_read(
size_t offset,
254 CUDF_FAIL(
"datasource classes that support device_read must override it.");
276 CUDF_FAIL(
"datasource classes that support device_read must override it.");
307 CUDF_FAIL(
"datasource classes that support device_read_async must override it.");
315 [[nodiscard]]
virtual size_t size()
const = 0;
322 [[nodiscard]]
virtual bool is_empty()
const {
return size() == 0; }
344 [[nodiscard]]
size_t size()
const override {
return _size; }
351 [[nodiscard]] uint8_t
const*
data()
const override {
return _data; }
354 uint8_t
const* _data{
nullptr};
365 template <
typename Container>
370 static_assert(std::is_rvalue_reference_v<Container&&>,
371 "The container argument passed to the constructor must be an rvalue.");
380 : _data(std::move(moved_data_owner)), _data_ptr(_data.data()), _size(_data.size())
393 owning_buffer(Container&& moved_data_owner, uint8_t
const* data_ptr,
size_t size)
394 : _data(std::move(moved_data_owner)), _data_ptr(data_ptr), _size(size)
403 [[nodiscard]]
size_t size()
const override {
return _size; }
410 [[nodiscard]] uint8_t
const*
data()
const override
412 return static_cast<uint8_t const*
>(_data_ptr);
417 void const* _data_ptr;
435 size_t max_size_estimate = 0);
Interface class for buffers that the datasource returns to the caller.
virtual ~buffer()=default
Base class destructor.
static std::unique_ptr< buffer > create(Container &&data_owner)
Factory to construct a datasource buffer object from a container.
virtual size_t size() const =0
Returns the buffer size in bytes.
virtual uint8_t const * data() const =0
Returns the address of the data in the buffer.
Implementation for non owning buffer where datasource holds buffer until destruction.
size_t size() const override
Returns the size of the buffer.
uint8_t const * data() const override
Returns the pointer to the buffer.
non_owning_buffer(uint8_t const *data, size_t size)
Construct a new non owning buffer object.
Derived implementation of buffer that owns the data.
owning_buffer(Container &&moved_data_owner)
Moves the input container into the newly created object.
owning_buffer(Container &&moved_data_owner, uint8_t const *data_ptr, size_t size)
Moves the input container into the newly created object, and exposes a subspan of the buffer.
size_t size() const override
Returns the size of the buffer.
uint8_t const * data() const override
Returns the pointer to the data in the buffer.
Interface class for providing input data to the readers.
static std::unique_ptr< datasource > create(std::string const &filepath, size_t offset=0, size_t max_size_estimate=0, std::optional< std::size_t > known_size=std::nullopt)
Creates a source from a file path.
virtual ~datasource()=default
Base class destructor.
static std::vector< std::unique_ptr< datasource > > create(std::vector< T > const &args)
Creates a vector of datasources, one per element in the input vector.
virtual bool supports_device_read() const
Whether or not this source supports reading directly into device memory.
static std::unique_ptr< datasource > create(datasource *source)
Creates a source from an user implemented datasource object.
virtual std::future< std::unique_ptr< datasource::buffer > > host_read_async(size_t offset, size_t size)
Asynchronously reads a specified portion of data from the datasource.
virtual size_t device_read(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream)
Reads a selected range into a preallocated device buffer.
virtual bool is_device_read_preferred(size_t size) const
Estimates whether a direct device read would be more optimal for the given size.
static std::unique_ptr< datasource > create(cudf::device_span< std::byte const > buffer)
Creates a source from a device memory buffer.
virtual std::future< size_t > device_read_async(size_t offset, size_t size, uint8_t *dst, rmm::cuda_stream_view stream)
Asynchronously reads a selected range into a preallocated device buffer.
virtual bool is_empty() const
Returns whether the source contains any data.
virtual std::future< size_t > host_read_async(size_t offset, size_t size, uint8_t *dst)
Asynchronously reads data from the source into the provided host memory buffer.
virtual size_t host_read(size_t offset, size_t size, uint8_t *dst)=0
Reads a selected range into a preallocated buffer.
virtual std::unique_ptr< datasource::buffer > device_read(size_t offset, size_t size, rmm::cuda_stream_view stream)
Returns a device buffer with a subset of data from the source.
virtual size_t size() const =0
Returns the size of the data in the source.
virtual std::unique_ptr< datasource::buffer > host_read(size_t offset, size_t size)=0
Returns a buffer with a subset of data from the source.
static std::unique_ptr< datasource > create(cudf::host_span< std::byte const > buffer)
Creates a source from a host memory buffer.
std::vector< std::unique_ptr< cudf::io::datasource > > make_datasources(source_info const &info, size_t offset=0, size_t max_size_estimate=0)
Constructs datasources from dataset source information.
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
cuda::std::span< T, Extent > device_span
Device span is an alias of cuda::std::span.
cuDF-IO API type definitions
C++20 std::span with reduced feature set.
Source information for read interfaces.