data_sink.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/types.hpp>
20 #include <cudf/utilities/error.hpp>
21 
22 #include <rmm/cuda_stream_view.hpp>
23 
24 #include <algorithm>
25 #include <future>
26 #include <memory>
27 #include <string>
28 #include <vector>
29 
30 namespace CUDF_EXPORT cudf {
32 namespace io {
33 
43 class data_sink {
44  public:
51  static std::unique_ptr<data_sink> create(std::string const& filepath);
52 
59  static std::unique_ptr<data_sink> create(std::vector<char>* buffer);
60 
69  static std::unique_ptr<data_sink> create();
70 
82  static std::unique_ptr<data_sink> create(cudf::io::data_sink* const user_sink);
83 
90  template <typename T>
91  static std::vector<std::unique_ptr<data_sink>> create(std::vector<T> const& args)
92  {
93  std::vector<std::unique_ptr<data_sink>> sinks;
94  sinks.reserve(args.size());
95  std::transform(args.cbegin(), args.cend(), std::back_inserter(sinks), [](auto const& arg) {
96  return data_sink::create(arg);
97  });
98  return sinks;
99  }
100 
104  virtual ~data_sink(){};
105 
112  virtual void host_write(void const* data, size_t size) = 0;
113 
136  [[nodiscard]] virtual bool supports_device_write() const { return false; }
137 
144  [[nodiscard]] virtual bool is_device_write_preferred(size_t size) const
145  {
146  return supports_device_write();
147  }
148 
163  virtual void device_write(void const* gpu_data, size_t size, rmm::cuda_stream_view stream)
164  {
165  CUDF_FAIL("data_sink classes that support device_write must override it.");
166  }
167 
190  virtual std::future<void> device_write_async(void const* gpu_data,
191  size_t size,
192  rmm::cuda_stream_view stream)
193  {
194  CUDF_FAIL("data_sink classes that support device_write_async must override it.");
195  }
196 
200  virtual void flush() = 0;
201 
207  virtual size_t bytes_written() = 0;
208 };
209  // end of group
211 } // namespace io
212 } // namespace CUDF_EXPORT cudf
Interface class for storing the output data from the writers.
Definition: data_sink.hpp:43
virtual void flush()=0
Flush the data written into the sink.
static std::vector< std::unique_ptr< data_sink > > create(std::vector< T > const &args)
Creates a vector of data sinks, one per element in the input vector.
Definition: data_sink.hpp:91
virtual void device_write(void const *gpu_data, size_t size, rmm::cuda_stream_view stream)
Append the buffer content to the sink from a gpu address.
Definition: data_sink.hpp:163
virtual bool supports_device_write() const
Whether or not this sink supports writing from gpu memory addresses.
Definition: data_sink.hpp:136
static std::unique_ptr< data_sink > create(cudf::io::data_sink *const user_sink)
Create a wrapped custom user data sink.
virtual bool is_device_write_preferred(size_t size) const
Estimates whether a direct device write would be more optimal for the given size.
Definition: data_sink.hpp:144
static std::unique_ptr< data_sink > create()
Create a void sink (one that does no actual io)
static std::unique_ptr< data_sink > create(std::string const &filepath)
Create a sink from a file path.
virtual ~data_sink()
Base class destructor.
Definition: data_sink.hpp:104
virtual size_t bytes_written()=0
Returns the total number of bytes written into this sink.
virtual void host_write(void const *data, size_t size)=0
Append the buffer content to the sink.
virtual std::future< void > device_write_async(void const *gpu_data, size_t size, rmm::cuda_stream_view stream)
Asynchronously append the buffer content to the sink from a gpu address.
Definition: data_sink.hpp:190
static std::unique_ptr< data_sink > create(std::vector< char > *buffer)
Create a sink from a std::vector.
std::unique_ptr< column > transform(column_view const &input, std::string const &unary_udf, data_type output_type, bool is_ptx, rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Creates a new column by applying a unary function against every element of an input column.
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
cuDF interfaces
Definition: aggregation.hpp:35
Type declarations for libcudf.