data_sink.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/types.hpp>
10 
11 #include <rmm/cuda_stream_view.hpp>
12 
13 #include <algorithm>
14 #include <future>
15 #include <memory>
16 #include <string>
17 #include <vector>
18 
19 namespace CUDF_EXPORT cudf {
21 namespace io {
22 
32 class data_sink {
33  public:
40  static std::unique_ptr<data_sink> create(std::string const& filepath);
41 
48  static std::unique_ptr<data_sink> create(std::vector<char>* buffer);
49 
58  static std::unique_ptr<data_sink> create();
59 
71  static std::unique_ptr<data_sink> create(cudf::io::data_sink* const user_sink);
72 
79  template <typename T>
80  static std::vector<std::unique_ptr<data_sink>> create(std::vector<T> const& args)
81  {
82  std::vector<std::unique_ptr<data_sink>> sinks;
83  sinks.reserve(args.size());
84  std::transform(args.cbegin(), args.cend(), std::back_inserter(sinks), [](auto const& arg) {
85  return data_sink::create(arg);
86  });
87  return sinks;
88  }
89 
93  virtual ~data_sink() {};
94 
101  virtual void host_write(void const* data, size_t size) = 0;
102 
125  [[nodiscard]] virtual bool supports_device_write() const { return false; }
126 
133  [[nodiscard]] virtual bool is_device_write_preferred(size_t size) const
134  {
135  return supports_device_write();
136  }
137 
152  virtual void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream)
153  {
154  CUDF_FAIL("data_sink classes that support device_write must override it.");
155  }
156 
179  virtual std::future<void> device_write_async(void const* gpu_data,
180  size_t size,
181  rmm::cuda_stream_view stream)
182  {
183  CUDF_FAIL("data_sink classes that support device_write_async must override it.");
184  }
185 
189  virtual void flush() = 0;
190 
196  virtual size_t bytes_written() = 0;
197 };
198  // end of group
200 } // namespace io
201 } // namespace CUDF_EXPORT cudf
Interface class for storing the output data from the writers.
Definition: data_sink.hpp:32
virtual void flush()=0
Flush the data written into the sink.
static std::vector< std::unique_ptr< data_sink > > create(std::vector< T > const &args)
Creates a vector of data sinks, one per element in the input vector.
Definition: data_sink.hpp:80
virtual void device_write(void const *gpu_data, size_t size, rmm::cuda_stream_view stream)
Append the buffer content to the sink from a gpu address.
Definition: data_sink.hpp:152
virtual bool supports_device_write() const
Whether or not this sink supports writing from gpu memory addresses.
Definition: data_sink.hpp:125
static std::unique_ptr< data_sink > create(cudf::io::data_sink *const user_sink)
Create a wrapped custom user data sink.
virtual bool is_device_write_preferred(size_t size) const
Estimates whether a direct device write would be more optimal for the given size.
Definition: data_sink.hpp:133
static std::unique_ptr< data_sink > create()
Create a void sink (one that does no actual io)
static std::unique_ptr< data_sink > create(std::string const &filepath)
Create a sink from a file path.
virtual ~data_sink()
Base class destructor.
Definition: data_sink.hpp:93
virtual size_t bytes_written()=0
Returns the total number of bytes written into this sink.
virtual void host_write(void const *data, size_t size)=0
Append the buffer content to the sink.
virtual std::future< void > device_write_async(void const *gpu_data, size_t size, rmm::cuda_stream_view stream)
Asynchronously append the buffer content to the sink from a gpu address.
Definition: data_sink.hpp:179
static std::unique_ptr< data_sink > create(std::vector< char > *buffer)
Create a sink from a std::vector.
std::unique_ptr< column > transform(std::vector< column_view > const &inputs, std::string const &transform_udf, data_type output_type, bool is_ptx, std::optional< void * > user_data=std::nullopt, null_aware is_null_aware=null_aware::NO, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a new column by applying a transform function against every element of the input columns.
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:182
cuDF interfaces
Definition: host_udf.hpp:26
Type declarations for libcudf.