interop.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/column/column.hpp>
20 #include <cudf/detail/transform.hpp>
22 #include <cudf/table/table.hpp>
24 #include <cudf/types.hpp>
25 #include <cudf/utilities/export.hpp>
27 #include <cudf/utilities/span.hpp>
28 
29 #include <rmm/resource_ref.hpp>
30 
31 #include <utility>
32 
33 struct DLManagedTensor;
34 
35 struct ArrowDeviceArray;
36 
37 struct ArrowSchema;
38 
39 struct ArrowArray;
40 
41 struct ArrowArrayStream;
42 
44 // These are types from arrow that we are forward declaring for our API to
45 // avoid needing to include nanoarrow headers.
46 typedef int32_t ArrowDeviceType; // NOLINT
47 
48 #define ARROW_DEVICE_CUDA 2 // NOLINT
50 
51 namespace CUDF_EXPORT cudf {
76 std::unique_ptr<table> from_dlpack(
77  DLManagedTensor const* managed_tensor,
80 
100 DLManagedTensor* to_dlpack(
101  table_view const& input,
104  // end of group
106 
120  std::string name;
121  std::vector<column_metadata> children_meta;
122 
128  column_metadata(std::string _name) : name(std::move(_name)) {}
129  column_metadata() = default;
130 };
131 
136 using unique_schema_t = std::unique_ptr<ArrowSchema, void (*)(ArrowSchema*)>;
137 
142 using unique_device_array_t = std::unique_ptr<ArrowDeviceArray, void (*)(ArrowDeviceArray*)>;
143 
148 using owned_columns_t = std::vector<std::unique_ptr<cudf::column>>;
149 
158 template <typename ViewType>
165  explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {}
166 
172  void operator()(ViewType* ptr) const { delete ptr; }
173 
175 };
176 
182  std::unique_ptr<cudf::table_view, custom_view_deleter<cudf::table_view>>;
183 
189  std::unique_ptr<cudf::column_view, custom_view_deleter<cudf::column_view>>;
190 
191 namespace interop {
192 
193 struct arrow_array_container;
194 
209 
223 std::vector<cudf::column_metadata> get_table_metadata(cudf::table_view const& input);
224 
234  public:
247  column_metadata const& metadata,
250 
263  arrow_column(ArrowSchema&& schema,
264  ArrowDeviceArray&& input,
267 
280  arrow_column(ArrowSchema&& schema,
281  ArrowArray&& input,
284 
296  arrow_column(ArrowArrayStream&& input,
299 
311  ArrowSchema* output,
314 
323  void to_arrow(ArrowDeviceArray* output,
324  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
327 
335  [[nodiscard]] column_view view() const;
336 
337  private:
338  std::shared_ptr<arrow_array_container>
339  container;
341  owned_columns_t view_columns;
342  column_view cached_view;
343 };
344 
353 class arrow_table {
354  public:
370 
383  arrow_table(ArrowSchema&& schema,
384  ArrowDeviceArray&& input,
387 
400  arrow_table(ArrowSchema&& schema,
401  ArrowArray&& input,
404 
416  arrow_table(ArrowArrayStream&& input,
419 
431  ArrowSchema* output,
434 
443  void to_arrow(ArrowDeviceArray* output,
444  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
447 
455  [[nodiscard]] table_view view() const;
456 
457  private:
458  std::shared_ptr<arrow_array_container>
459  container;
461  owned_columns_t view_columns;
462  table_view cached_view;
463 };
464 
465 } // namespace interop
466 
484 
510  cudf::table&& table,
513 
539  cudf::column&& col,
542 
571  cudf::table_view const& table,
574 
603  cudf::column_view const& col,
606 
628  cudf::table_view const& table,
631 
653  cudf::column_view const& col,
656 
673  cudf::strings_column_view const& col,
676 
694 std::unique_ptr<cudf::table> from_arrow(
695  ArrowSchema const* schema,
696  ArrowArray const* input,
699 
713 std::unique_ptr<cudf::column> from_arrow_column(
714  ArrowSchema const* schema,
715  ArrowArray const* input,
718 
739 std::unique_ptr<table> from_arrow_host(
740  ArrowSchema const* schema,
741  ArrowDeviceArray const* input,
744 
758 std::unique_ptr<table> from_arrow_stream(
759  ArrowArrayStream* input,
762 
776 std::unique_ptr<column> from_arrow_stream_column(
777  ArrowArrayStream* input,
780 
800 std::unique_ptr<column> from_arrow_host_column(
801  ArrowSchema const* schema,
802  ArrowDeviceArray const* input,
805 
844  ArrowSchema const* schema,
845  ArrowDeviceArray const* input,
848 
882  ArrowSchema const* schema,
883  ArrowDeviceArray const* input,
886  // end of group
888 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:47
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:233
arrow_column(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
column_view view() const
Get a view of the column data.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowDeviceArray.
arrow_column(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(cudf::column &&input, column_metadata const &metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowSchema.
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:353
arrow_table(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(cudf::table &&input, cudf::host_span< column_metadata const > metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowDeviceArray.
arrow_table(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
table_view view() const
Get a view of the table data.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowSchema.
Given a column-view of strings type, an instance of this class provides a wrapper on this compound co...
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Class definition for cudf::column.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::table_view, custom_view_deleter< cudf::table_view > > unique_table_view_t
typedef for a unique_ptr to a cudf::table_view with custom deleter
Definition: interop.hpp:182
std::vector< std::unique_ptr< cudf::column > > owned_columns_t
typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
Definition: interop.hpp:148
std::unique_ptr< column > from_arrow_host_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowDeviceArray input.
std::unique_ptr< cudf::column > from_arrow_column(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from a given ArrowArray and ArrowSchema input.
unique_schema_t to_arrow_schema(cudf::table_view const &input, cudf::host_span< column_metadata const > metadata)
Create ArrowSchema from cudf table and metadata.
std::unique_ptr< table > from_arrow_stream(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArrayStream input.
unique_table_view_t from_arrow_device(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host_stringview(cudf::strings_column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy strings column data to host and create ArrowDeviceArray for it using the ArrowBinaryView format.
unique_column_view_t from_arrow_device_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy column view data to host and create ArrowDeviceArray for it.
unique_device_array_t to_arrow_device(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create ArrowDeviceArray from a column view.
std::unique_ptr< cudf::column_view, custom_view_deleter< cudf::column_view > > unique_column_view_t
typedef for a unique_ptr to a cudf::column_view with custom deleter
Definition: interop.hpp:189
std::unique_ptr< ArrowSchema, void(*)(ArrowSchema *)> unique_schema_t
typedef for a unique_ptr to an ArrowSchema with custom deleter
Definition: interop.hpp:136
std::unique_ptr< ArrowDeviceArray, void(*)(ArrowDeviceArray *)> unique_device_array_t
typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter
Definition: interop.hpp:142
std::unique_ptr< table > from_arrow_host(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowDeviceArray input.
std::unique_ptr< column > from_arrow_stream_column(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowArrayStream input.
std::unique_ptr< cudf::table > from_arrow(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArray and ArrowSchema input.
std::unique_ptr< table > from_dlpack(DLManagedTensor const *managed_tensor, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a DLPack DLTensor into a cudf table.
DLManagedTensor * to_dlpack(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a cudf table into a DLPack DLTensor.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
std::vector< cudf::column_metadata > get_table_metadata(cudf::table_view const &input)
Helper function to generate empty table metadata (all columns with no names) for arrow conversion.
cudf::column_metadata get_column_metadata(cudf::column_view const &input)
Helper function to generate empty column metadata (column with no name) for arrow conversion.
cuDF interfaces
Definition: host_udf.hpp:37
APIs for spans.
Class definition for cudf::strings_column_view.
Detailed metadata information for arrow array.
Definition: interop.hpp:119
column_metadata(std::string _name)
Construct a new column metadata object.
Definition: interop.hpp:128
std::vector< column_metadata > children_meta
Metadata of children of the column.
Definition: interop.hpp:121
std::string name
Name of the column.
Definition: interop.hpp:120
functor for a custom deleter to a unique_ptr of table_view
Definition: interop.hpp:159
void operator()(ViewType *ptr) const
operator to delete the unique_ptr
Definition: interop.hpp:172
owned_columns_t owned_mem_
Owned columns that must be deleted.
Definition: interop.hpp:174
custom_view_deleter(owned_columns_t &&owned)
Construct a new custom view deleter object.
Definition: interop.hpp:165
C++20 std::span with reduced feature set.
Definition: span.hpp:194
Class definition for cudf::table.
Class definitions for (mutable)_table_view
Type declarations for libcudf.