host_udf.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 
6 #pragma once
7 
8 #include <cudf/aggregation.hpp>
10 #include <cudf/types.hpp>
11 #include <cudf/utilities/export.hpp>
12 #include <cudf/utilities/span.hpp>
13 
14 #include <rmm/cuda_stream_view.hpp>
15 #include <rmm/resource_ref.hpp>
16 
17 #include <functional>
18 #include <optional>
19 
26 namespace CUDF_EXPORT cudf {
40  // Declare constructor private to prevent the users from deriving from this class.
41  private:
42  host_udf_base() = default;
43 
44  // Only allow deriving from the structs below.
45  friend struct reduce_host_udf;
46  friend struct segmented_reduce_host_udf;
47  friend struct groupby_host_udf;
48 
49  public:
50  virtual ~host_udf_base() = default;
51 
60  [[nodiscard]] virtual std::size_t do_hash() const
61  {
62  return std::hash<int>{}(static_cast<int>(aggregation::Kind::HOST_UDF));
63  }
64 
70  [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0;
71 
79  [[nodiscard]] virtual std::unique_ptr<host_udf_base> clone() const = 0;
80 };
81 
131  [[nodiscard]] virtual std::unique_ptr<scalar> operator()(
132  column_view const& input,
133  data_type output_dtype,
134  std::optional<std::reference_wrapper<scalar const>> init,
135  rmm::cuda_stream_view stream,
136  rmm::device_async_resource_ref mr) const = 0;
137 };
138 
193  [[nodiscard]] virtual std::unique_ptr<column> operator()(
194  column_view const& input,
196  data_type output_dtype,
197  null_policy null_handling,
198  std::optional<std::reference_wrapper<scalar const>> init,
199  rmm::cuda_stream_view stream,
200  rmm::device_async_resource_ref mr) const = 0;
201 };
202 
203 // Forward declaration.
204 namespace groupby ::detail {
205 struct aggregate_result_functor;
206 }
207 
267  [[nodiscard]] virtual std::unique_ptr<column> get_empty_output(
269 
277  [[nodiscard]] virtual std::unique_ptr<column> operator()(
279 
280  private:
281  // Allow the struct `aggregate_result_functor` to set its private callback variables.
282  friend struct groupby::detail::aggregate_result_functor;
283 
287  std::function<column_view(void)> callback_input_values;
288 
293  std::function<column_view(void)> callback_grouped_values;
294 
299  std::function<column_view(void)> callback_sorted_grouped_values;
300 
304  std::function<size_type(void)> callback_num_groups;
305 
309  std::function<device_span<size_type const>(void)> callback_group_offsets;
310 
314  std::function<device_span<size_type const>(void)> callback_group_labels;
315 
319  std::function<column_view(std::unique_ptr<aggregation>)> callback_compute_aggregation;
320 
321  protected:
327  [[nodiscard]] column_view get_input_values() const
328  {
329  CUDF_EXPECTS(callback_input_values, "Uninitialized callback_input_values.");
330  return callback_input_values();
331  }
332 
339  [[nodiscard]] column_view get_grouped_values() const
340  {
341  CUDF_EXPECTS(callback_grouped_values, "Uninitialized callback_grouped_values.");
342  return callback_grouped_values();
343  }
344 
352  {
353  CUDF_EXPECTS(callback_sorted_grouped_values, "Uninitialized callback_sorted_grouped_values.");
354  return callback_sorted_grouped_values();
355  }
356 
362  [[nodiscard]] size_type get_num_groups() const
363  {
364  CUDF_EXPECTS(callback_num_groups, "Uninitialized callback_num_groups.");
365  return callback_num_groups();
366  }
367 
374  {
375  CUDF_EXPECTS(callback_group_offsets, "Uninitialized callback_group_offsets.");
376  return callback_group_offsets();
377  }
378 
385  {
386  CUDF_EXPECTS(callback_group_labels, "Uninitialized callback_group_labels.");
387  return callback_group_labels();
388  }
389 
399  [[nodiscard]] column_view compute_aggregation(std::unique_ptr<aggregation> other_agg) const
400  {
401  CUDF_EXPECTS(callback_compute_aggregation, "Uninitialized callback for computing aggregation.");
402  return callback_compute_aggregation(std::move(other_agg));
403  }
404 };
405  // end of group
407 } // namespace CUDF_EXPORT cudf
Representation for specifying desired aggregations from aggregation-based APIs, e....
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Indicator for the logical data type of an element in a column.
Definition: types.hpp:238
The fundamental interface for host-based UDF implementation.
Definition: host_udf.hpp:39
virtual ~host_udf_base()=default
Default destructor.
virtual bool is_equal(host_udf_base const &other) const =0
Compares two instances of the derived class for equality.
virtual std::unique_ptr< host_udf_base > clone() const =0
Clones the instance.
virtual std::size_t do_hash() const
Computes hash value of the instance.
Definition: host_udf.hpp:60
column view class definitions
detail::cccl_async_resource_ref< cuda::mr::resource_ref< cuda::mr::device_accessible > > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:143
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:115
cuDF interfaces
Definition: host_udf.hpp:26
APIs for spans.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:323
The interface for host-based UDF implementation for groupby aggregation context.
Definition: host_udf.hpp:256
device_span< size_type const > get_group_labels() const
Access the group labels (which is also the same as group indices).
Definition: host_udf.hpp:384
size_type get_num_groups() const
Access the number of groups (i.e., number of distinct keys).
Definition: host_udf.hpp:362
column_view get_grouped_values() const
Access the input values grouped according to the input keys for which the values within each group ma...
Definition: host_udf.hpp:339
column_view get_sorted_grouped_values() const
Access the input values grouped according to the input keys and sorted within each group.
Definition: host_udf.hpp:351
column_view get_input_values() const
Access the input values column.
Definition: host_udf.hpp:327
column_view compute_aggregation(std::unique_ptr< aggregation > other_agg) const
Compute a built-in groupby aggregation and access its result.
Definition: host_udf.hpp:399
virtual std::unique_ptr< column > get_empty_output(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Get the output when the input values column is empty.
virtual std::unique_ptr< column > operator()(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform the main groupby computation for the host-based UDF.
device_span< size_type const > get_group_offsets() const
Access the offsets separating groups.
Definition: host_udf.hpp:373
The interface for host-based UDF implementation for reduction contexts.
Definition: host_udf.hpp:120
virtual std::unique_ptr< scalar > operator()(column_view const &input, data_type output_dtype, std::optional< std::reference_wrapper< scalar const >> init, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform reduction operations.
The interface for host-based UDF implementation for segmented reduction context.
Definition: host_udf.hpp:178
virtual std::unique_ptr< column > operator()(column_view const &input, device_span< size_type const > offsets, data_type output_dtype, null_policy null_handling, std::optional< std::reference_wrapper< scalar const >> init, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform segmented reduction operations.
Type declarations for libcudf.