interop.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/column/column.hpp>
9 #include <cudf/detail/transform.hpp>
11 #include <cudf/table/table.hpp>
13 #include <cudf/types.hpp>
14 #include <cudf/utilities/export.hpp>
16 #include <cudf/utilities/span.hpp>
17 
18 #include <rmm/resource_ref.hpp>
19 
20 #include <utility>
21 
22 struct DLManagedTensor;
23 
24 struct ArrowDeviceArray;
25 
26 struct ArrowSchema;
27 
28 struct ArrowArray;
29 
30 struct ArrowArrayStream;
31 
33 // These are types from arrow that we are forward declaring for our API to
34 // avoid needing to include nanoarrow headers.
35 typedef int32_t ArrowDeviceType; // NOLINT
36 
37 #define ARROW_DEVICE_CUDA 2 // NOLINT
39 
40 namespace CUDF_EXPORT cudf {
65 std::unique_ptr<table> from_dlpack(
66  DLManagedTensor const* managed_tensor,
69 
89 DLManagedTensor* to_dlpack(
90  table_view const& input,
93  // end of group
95 
108  std::string name;
109  std::string timezone;
110  std::optional<int32_t> precision;
111  std::vector<column_metadata> children_meta;
112 
118  column_metadata(std::string _name) : name(std::move(_name)) {}
119  column_metadata() = default;
120 };
121 
126 using unique_schema_t = std::unique_ptr<ArrowSchema, void (*)(ArrowSchema*)>;
127 
132 using unique_device_array_t = std::unique_ptr<ArrowDeviceArray, void (*)(ArrowDeviceArray*)>;
133 
138 using owned_columns_t = std::vector<std::unique_ptr<cudf::column>>;
139 
148 template <typename ViewType>
155  explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {}
156 
162  void operator()(ViewType* ptr) const { delete ptr; }
163 
165 };
166 
172  std::unique_ptr<cudf::table_view, custom_view_deleter<cudf::table_view>>;
173 
179  std::unique_ptr<cudf::column_view, custom_view_deleter<cudf::column_view>>;
180 
181 namespace interop {
182 
183 struct arrow_array_container;
184 
199 
213 std::vector<cudf::column_metadata> get_table_metadata(cudf::table_view const& input);
214 
224  public:
237  column_metadata const& metadata,
240 
253  arrow_column(ArrowSchema&& schema,
254  ArrowDeviceArray&& input,
257 
270  arrow_column(ArrowSchema&& schema,
271  ArrowArray&& input,
274 
286  arrow_column(ArrowArrayStream&& input,
289 
301  ArrowSchema* output,
304 
313  void to_arrow(ArrowDeviceArray* output,
314  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
317 
325  [[nodiscard]] column_view view() const;
326 
327  private:
328  std::shared_ptr<arrow_array_container>
329  container;
331  owned_columns_t view_columns;
332  column_view cached_view;
333 };
334 
343 class arrow_table {
344  public:
360 
373  arrow_table(ArrowSchema&& schema,
374  ArrowDeviceArray&& input,
377 
390  arrow_table(ArrowSchema&& schema,
391  ArrowArray&& input,
394 
406  arrow_table(ArrowArrayStream&& input,
409 
421  ArrowSchema* output,
424 
433  void to_arrow(ArrowDeviceArray* output,
434  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
437 
445  [[nodiscard]] table_view view() const;
446 
447  private:
448  std::shared_ptr<arrow_array_container>
449  container;
451  owned_columns_t view_columns;
452  table_view cached_view;
453 };
454 
455 } // namespace interop
456 
474 
500  cudf::table&& table,
503 
529  cudf::column&& col,
532 
561  cudf::table_view const& table,
564 
593  cudf::column_view const& col,
596 
618  cudf::table_view const& table,
621 
643  cudf::column_view const& col,
646 
663  cudf::strings_column_view const& col,
666 
684 std::unique_ptr<cudf::table> from_arrow(
685  ArrowSchema const* schema,
686  ArrowArray const* input,
689 
703 std::unique_ptr<cudf::column> from_arrow_column(
704  ArrowSchema const* schema,
705  ArrowArray const* input,
708 
729 std::unique_ptr<table> from_arrow_host(
730  ArrowSchema const* schema,
731  ArrowDeviceArray const* input,
734 
748 std::unique_ptr<table> from_arrow_stream(
749  ArrowArrayStream* input,
752 
766 std::unique_ptr<column> from_arrow_stream_column(
767  ArrowArrayStream* input,
770 
790 std::unique_ptr<column> from_arrow_host_column(
791  ArrowSchema const* schema,
792  ArrowDeviceArray const* input,
795 
834  ArrowSchema const* schema,
835  ArrowDeviceArray const* input,
838 
872  ArrowSchema const* schema,
873  ArrowDeviceArray const* input,
876  // end of group
878 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:36
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:223
arrow_column(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
column_view view() const
Get a view of the column data.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowDeviceArray.
arrow_column(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(cudf::column &&input, column_metadata const &metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowSchema.
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:343
arrow_table(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(cudf::table &&input, cudf::host_span< column_metadata const > metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowDeviceArray.
arrow_table(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
table_view view() const
Get a view of the table data.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowSchema.
Given a column-view of strings type, an instance of this class provides a wrapper on this compound co...
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
Class definition for cudf::column.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::table_view, custom_view_deleter< cudf::table_view > > unique_table_view_t
typedef for a unique_ptr to a cudf::table_view with custom deleter
Definition: interop.hpp:172
std::vector< std::unique_ptr< cudf::column > > owned_columns_t
typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
Definition: interop.hpp:138
std::unique_ptr< column > from_arrow_host_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowDeviceArray input.
std::unique_ptr< cudf::column > from_arrow_column(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from a given ArrowArray and ArrowSchema input.
unique_schema_t to_arrow_schema(cudf::table_view const &input, cudf::host_span< column_metadata const > metadata)
Create ArrowSchema from cudf table and metadata.
std::unique_ptr< table > from_arrow_stream(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArrayStream input.
unique_table_view_t from_arrow_device(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host_stringview(cudf::strings_column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy strings column data to host and create ArrowDeviceArray for it using the ArrowBinaryView format.
unique_column_view_t from_arrow_device_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy column view data to host and create ArrowDeviceArray for it.
unique_device_array_t to_arrow_device(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create ArrowDeviceArray from a column view.
std::unique_ptr< cudf::column_view, custom_view_deleter< cudf::column_view > > unique_column_view_t
typedef for a unique_ptr to a cudf::column_view with custom deleter
Definition: interop.hpp:179
std::unique_ptr< ArrowSchema, void(*)(ArrowSchema *)> unique_schema_t
typedef for a unique_ptr to an ArrowSchema with custom deleter
Definition: interop.hpp:126
std::unique_ptr< ArrowDeviceArray, void(*)(ArrowDeviceArray *)> unique_device_array_t
typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter
Definition: interop.hpp:132
std::unique_ptr< table > from_arrow_host(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowDeviceArray input.
std::unique_ptr< column > from_arrow_stream_column(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowArrayStream input.
std::unique_ptr< cudf::table > from_arrow(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArray and ArrowSchema input.
std::unique_ptr< table > from_dlpack(DLManagedTensor const *managed_tensor, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a DLPack DLTensor into a cudf table.
DLManagedTensor * to_dlpack(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a cudf table into a DLPack DLTensor.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
std::vector< cudf::column_metadata > get_table_metadata(cudf::table_view const &input)
Helper function to generate empty table metadata (all columns with no names) for arrow conversion.
cudf::column_metadata get_column_metadata(cudf::column_view const &input)
Helper function to generate empty column metadata (column with no name) for arrow conversion.
cuDF interfaces
Definition: host_udf.hpp:26
APIs for spans.
Class definition for cudf::strings_column_view.
Detailed metadata information for arrow array.
Definition: interop.hpp:107
std::optional< int32_t > precision
Resulting decimal precision of the column.
Definition: interop.hpp:110
column_metadata(std::string _name)
Construct a new column metadata object.
Definition: interop.hpp:118
std::vector< column_metadata > children_meta
Metadata of children of the column.
Definition: interop.hpp:111
std::string name
Name of the column.
Definition: interop.hpp:108
std::string timezone
Timezone of the column.
Definition: interop.hpp:109
functor for a custom deleter to a unique_ptr of table_view
Definition: interop.hpp:149
void operator()(ViewType *ptr) const
operator to delete the unique_ptr
Definition: interop.hpp:162
owned_columns_t owned_mem_
Owned columns that must be deleted.
Definition: interop.hpp:164
custom_view_deleter(owned_columns_t &&owned)
Construct a new custom view deleter object.
Definition: interop.hpp:155
C++20 std::span with reduced feature set.
Definition: span.hpp:182
Class definition for cudf::table.
Class definitions for (mutable)_table_view
Type declarations for libcudf.