host_udf.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/aggregation.hpp>
20 #include <cudf/types.hpp>
21 #include <cudf/utilities/export.hpp>
22 #include <cudf/utilities/span.hpp>
24 
25 #include <rmm/cuda_stream_view.hpp>
26 #include <rmm/resource_ref.hpp>
27 
28 #include <optional>
29 #include <unordered_map>
30 #include <unordered_set>
31 #include <variant>
32 
39 namespace CUDF_EXPORT cudf {
99 struct host_udf_base {
100  host_udf_base() = default;
101  virtual ~host_udf_base() = default;
102 
108  enum class groupby_data_attribute : int32_t {
109  INPUT_VALUES,
110  GROUPED_VALUES,
112  SORTED_GROUPED_VALUES,
114  NUM_GROUPS,
115  GROUP_OFFSETS,
116  GROUP_LABELS
117  };
118 
130  struct data_attribute {
134  using value_type = std::variant<groupby_data_attribute, std::unique_ptr<aggregation>>;
137 
138  data_attribute() = default;
140 
145  template <typename T, CUDF_ENABLE_IF(std::is_same_v<T, groupby_data_attribute>)>
146  data_attribute(T value_) : value{value_}
147  {
148  }
149 
154  template <typename T,
155  CUDF_ENABLE_IF(std::is_same_v<T, aggregation> ||
156  std::is_same_v<T, groupby_aggregation>)>
157  data_attribute(std::unique_ptr<T> value_) : value{std::move(value_)}
158  {
159  CUDF_EXPECTS(std::get<std::unique_ptr<aggregation>>(value) != nullptr,
160  "Invalid aggregation request.");
161  if constexpr (std::is_same_v<T, aggregation>) {
162  CUDF_EXPECTS(
163  dynamic_cast<groupby_aggregation*>(std::get<std::unique_ptr<T>>(value).get()) != nullptr,
164  "Requesting results from other aggregations is only supported in groupby "
165  "aggregations.");
166  }
167  }
168 
174 
178  struct hash {
184  std::size_t operator()(data_attribute const& attr) const;
185  }; // struct hash
186 
190  struct equal_to {
197  bool operator()(data_attribute const& lhs, data_attribute const& rhs) const;
198  }; // struct equal_to
199  }; // struct data_attribute
200 
205  std::unordered_set<data_attribute, data_attribute::hash, data_attribute::equal_to>;
206 
217  [[nodiscard]] virtual data_attribute_set_t get_required_data() const { return {}; }
218 
223  using input_data_t = std::variant<column_view, size_type, device_span<size_type const>>;
224 
228  using input_map_t = std::
229  unordered_map<data_attribute, input_data_t, data_attribute::hash, data_attribute::equal_to>;
230 
237  using output_t = std::variant<std::unique_ptr<column>>;
238 
250  [[nodiscard]] virtual output_t get_empty_output(std::optional<data_type> output_dtype,
251  rmm::cuda_stream_view stream,
252  rmm::device_async_resource_ref mr) const = 0;
253 
262  [[nodiscard]] virtual output_t operator()(input_map_t const& input,
263  rmm::cuda_stream_view stream,
264  rmm::device_async_resource_ref mr) const = 0;
265 
270  [[nodiscard]] virtual std::size_t do_hash() const
271  {
272  return std::hash<int>{}(static_cast<int>(aggregation::Kind::HOST_UDF));
273  }
274 
280  [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0;
281 
290  [[nodiscard]] virtual std::unique_ptr<host_udf_base> clone() const = 0;
291 };
292  // end of group
294 } // namespace CUDF_EXPORT cudf
Representation for specifying desired aggregations from aggregation-based APIs, e....
Derived class intended for groupby specific aggregation usage.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
#define CUDF_ENABLE_IF(...)
Convenience macro for SFINAE as an unnamed template parameter.
Definition: traits.hpp:48
cuDF interfaces
Definition: host_udf.hpp:39
APIs for spans.
Equality comparison functor for data_attribute.
Definition: host_udf.hpp:190
bool operator()(data_attribute const &lhs, data_attribute const &rhs) const
Check if two data attributes are equal.
Hash functor for data_attribute.
Definition: host_udf.hpp:178
std::size_t operator()(data_attribute const &attr) const
Compute the hash value of a data attribute.
Describe possible data that may be needed in the derived class for its operations.
Definition: host_udf.hpp:130
data_attribute(std::unique_ptr< T > value_)
Construct a new data attribute from another aggregation request.
Definition: host_udf.hpp:157
data_attribute()=default
Default constructor.
data_attribute(T value_)
Construct a new data attribute from an aggregation attribute.
Definition: host_udf.hpp:146
data_attribute(data_attribute &&)=default
Move constructor.
data_attribute(data_attribute const &other)
Copy constructor.
std::variant< groupby_data_attribute, std::unique_ptr< aggregation > > value_type
Hold all possible data types for the input of the aggregation in the derived class.
Definition: host_udf.hpp:134
The interface for host-based UDF implementation.
Definition: host_udf.hpp:99
virtual output_t operator()(input_map_t const &input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform the main computation for the host-based UDF.
std::unordered_set< data_attribute, data_attribute::hash, data_attribute::equal_to > data_attribute_set_t
Set of attributes for the input data that is needed for computing the aggregation.
Definition: host_udf.hpp:205
std::variant< column_view, size_type, device_span< size_type const > > input_data_t
Hold all possible types of the data that is passed to the derived class for executing the aggregation...
Definition: host_udf.hpp:223
virtual bool is_equal(host_udf_base const &other) const =0
Compares two instances of the derived class for equality.
std::unordered_map< data_attribute, input_data_t, data_attribute::hash, data_attribute::equal_to > input_map_t
Input to the aggregation, mapping from each data attribute to its actual data.
Definition: host_udf.hpp:229
virtual data_attribute_set_t get_required_data() const
Return a set of attributes for the data that is needed for computing the aggregation.
Definition: host_udf.hpp:217
std::variant< std::unique_ptr< column > > output_t
Output type of the aggregation.
Definition: host_udf.hpp:237
virtual output_t get_empty_output(std::optional< data_type > output_dtype, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Get the output when the input values column is empty.
groupby_data_attribute
Define the possible data needed for groupby aggregations.
Definition: host_udf.hpp:108
virtual std::unique_ptr< host_udf_base > clone() const =0
Clones the instance.
virtual std::size_t do_hash() const
Computes hash value of the class's instance.
Definition: host_udf.hpp:270
Type declarations for libcudf.