interop.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/column/column.hpp>
20 #include <cudf/detail/transform.hpp>
22 #include <cudf/table/table.hpp>
24 #include <cudf/types.hpp>
25 #include <cudf/utilities/export.hpp>
27 #include <cudf/utilities/span.hpp>
28 
29 #include <rmm/resource_ref.hpp>
30 
31 #include <utility>
32 
33 struct DLManagedTensor;
34 
35 struct ArrowDeviceArray;
36 
37 struct ArrowSchema;
38 
39 struct ArrowArray;
40 
41 struct ArrowArrayStream;
42 
44 // These are types from arrow that we are forward declaring for our API to
45 // avoid needing to include nanoarrow headers.
46 typedef int32_t ArrowDeviceType; // NOLINT
47 
48 #define ARROW_DEVICE_CUDA 2 // NOLINT
50 
51 namespace CUDF_EXPORT cudf {
76 std::unique_ptr<table> from_dlpack(
77  DLManagedTensor const* managed_tensor,
80 
100 DLManagedTensor* to_dlpack(
101  table_view const& input,
104  // end of group
106 
120  std::string name;
121  std::string timezone;
122  std::vector<column_metadata> children_meta;
123 
129  column_metadata(std::string _name) : name(std::move(_name)) {}
130  column_metadata() = default;
131 };
132 
137 using unique_schema_t = std::unique_ptr<ArrowSchema, void (*)(ArrowSchema*)>;
138 
143 using unique_device_array_t = std::unique_ptr<ArrowDeviceArray, void (*)(ArrowDeviceArray*)>;
144 
149 using owned_columns_t = std::vector<std::unique_ptr<cudf::column>>;
150 
159 template <typename ViewType>
166  explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {}
167 
173  void operator()(ViewType* ptr) const { delete ptr; }
174 
176 };
177 
183  std::unique_ptr<cudf::table_view, custom_view_deleter<cudf::table_view>>;
184 
190  std::unique_ptr<cudf::column_view, custom_view_deleter<cudf::column_view>>;
191 
192 namespace interop {
193 
194 struct arrow_array_container;
195 
210 
224 std::vector<cudf::column_metadata> get_table_metadata(cudf::table_view const& input);
225 
235  public:
248  column_metadata const& metadata,
251 
264  arrow_column(ArrowSchema&& schema,
265  ArrowDeviceArray&& input,
268 
281  arrow_column(ArrowSchema&& schema,
282  ArrowArray&& input,
285 
297  arrow_column(ArrowArrayStream&& input,
300 
312  ArrowSchema* output,
315 
324  void to_arrow(ArrowDeviceArray* output,
325  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
328 
336  [[nodiscard]] column_view view() const;
337 
338  private:
339  std::shared_ptr<arrow_array_container>
340  container;
342  owned_columns_t view_columns;
343  column_view cached_view;
344 };
345 
354 class arrow_table {
355  public:
371 
384  arrow_table(ArrowSchema&& schema,
385  ArrowDeviceArray&& input,
388 
401  arrow_table(ArrowSchema&& schema,
402  ArrowArray&& input,
405 
417  arrow_table(ArrowArrayStream&& input,
420 
432  ArrowSchema* output,
435 
444  void to_arrow(ArrowDeviceArray* output,
445  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
448 
456  [[nodiscard]] table_view view() const;
457 
458  private:
459  std::shared_ptr<arrow_array_container>
460  container;
462  owned_columns_t view_columns;
463  table_view cached_view;
464 };
465 
466 } // namespace interop
467 
485 
511  cudf::table&& table,
514 
540  cudf::column&& col,
543 
572  cudf::table_view const& table,
575 
604  cudf::column_view const& col,
607 
629  cudf::table_view const& table,
632 
654  cudf::column_view const& col,
657 
674  cudf::strings_column_view const& col,
677 
695 std::unique_ptr<cudf::table> from_arrow(
696  ArrowSchema const* schema,
697  ArrowArray const* input,
700 
714 std::unique_ptr<cudf::column> from_arrow_column(
715  ArrowSchema const* schema,
716  ArrowArray const* input,
719 
740 std::unique_ptr<table> from_arrow_host(
741  ArrowSchema const* schema,
742  ArrowDeviceArray const* input,
745 
759 std::unique_ptr<table> from_arrow_stream(
760  ArrowArrayStream* input,
763 
777 std::unique_ptr<column> from_arrow_stream_column(
778  ArrowArrayStream* input,
781 
801 std::unique_ptr<column> from_arrow_host_column(
802  ArrowSchema const* schema,
803  ArrowDeviceArray const* input,
806 
845  ArrowSchema const* schema,
846  ArrowDeviceArray const* input,
849 
883  ArrowSchema const* schema,
884  ArrowDeviceArray const* input,
887  // end of group
889 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:47
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:234
arrow_column(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
column_view view() const
Get a view of the column data.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowDeviceArray.
arrow_column(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(cudf::column &&input, column_metadata const &metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowSchema.
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:354
arrow_table(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(cudf::table &&input, cudf::host_span< column_metadata const > metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowDeviceArray.
arrow_table(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
table_view view() const
Get a view of the table data.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowSchema.
Given a column-view of strings type, an instance of this class provides a wrapper on this compound co...
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Class definition for cudf::column.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::table_view, custom_view_deleter< cudf::table_view > > unique_table_view_t
typedef for a unique_ptr to a cudf::table_view with custom deleter
Definition: interop.hpp:183
std::vector< std::unique_ptr< cudf::column > > owned_columns_t
typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
Definition: interop.hpp:149
std::unique_ptr< column > from_arrow_host_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowDeviceArray input.
std::unique_ptr< cudf::column > from_arrow_column(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from a given ArrowArray and ArrowSchema input.
unique_schema_t to_arrow_schema(cudf::table_view const &input, cudf::host_span< column_metadata const > metadata)
Create ArrowSchema from cudf table and metadata.
std::unique_ptr< table > from_arrow_stream(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArrayStream input.
unique_table_view_t from_arrow_device(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host_stringview(cudf::strings_column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy strings column data to host and create ArrowDeviceArray for it using the ArrowBinaryView format.
unique_column_view_t from_arrow_device_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy column view data to host and create ArrowDeviceArray for it.
unique_device_array_t to_arrow_device(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create ArrowDeviceArray from a column view.
std::unique_ptr< cudf::column_view, custom_view_deleter< cudf::column_view > > unique_column_view_t
typedef for a unique_ptr to a cudf::column_view with custom deleter
Definition: interop.hpp:190
std::unique_ptr< ArrowSchema, void(*)(ArrowSchema *)> unique_schema_t
typedef for a unique_ptr to an ArrowSchema with custom deleter
Definition: interop.hpp:137
std::unique_ptr< ArrowDeviceArray, void(*)(ArrowDeviceArray *)> unique_device_array_t
typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter
Definition: interop.hpp:143
std::unique_ptr< table > from_arrow_host(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowDeviceArray input.
std::unique_ptr< column > from_arrow_stream_column(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowArrayStream input.
std::unique_ptr< cudf::table > from_arrow(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArray and ArrowSchema input.
std::unique_ptr< table > from_dlpack(DLManagedTensor const *managed_tensor, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a DLPack DLTensor into a cudf table.
DLManagedTensor * to_dlpack(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a cudf table into a DLPack DLTensor.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
std::vector< cudf::column_metadata > get_table_metadata(cudf::table_view const &input)
Helper function to generate empty table metadata (all columns with no names) for arrow conversion.
cudf::column_metadata get_column_metadata(cudf::column_view const &input)
Helper function to generate empty column metadata (column with no name) for arrow conversion.
cuDF interfaces
Definition: host_udf.hpp:37
APIs for spans.
Class definition for cudf::strings_column_view.
Detailed metadata information for arrow array.
Definition: interop.hpp:119
column_metadata(std::string _name)
Construct a new column metadata object.
Definition: interop.hpp:129
std::vector< column_metadata > children_meta
Metadata of children of the column.
Definition: interop.hpp:122
std::string name
Name of the column.
Definition: interop.hpp:120
std::string timezone
Timezone of the column.
Definition: interop.hpp:121
functor for a custom deleter to a unique_ptr of table_view
Definition: interop.hpp:160
void operator()(ViewType *ptr) const
operator to delete the unique_ptr
Definition: interop.hpp:173
owned_columns_t owned_mem_
Owned columns that must be deleted.
Definition: interop.hpp:175
custom_view_deleter(owned_columns_t &&owned)
Construct a new custom view deleter object.
Definition: interop.hpp:166
C++20 std::span with reduced feature set.
Definition: span.hpp:194
Class definition for cudf::table.
Class definitions for (mutable)_table_view
Type declarations for libcudf.