host_udf.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/aggregation.hpp>
21 #include <cudf/types.hpp>
22 #include <cudf/utilities/export.hpp>
23 #include <cudf/utilities/span.hpp>
24 
25 #include <rmm/cuda_stream_view.hpp>
26 #include <rmm/resource_ref.hpp>
27 
28 #include <functional>
29 #include <optional>
30 
37 namespace CUDF_EXPORT cudf {
51  // Declare constructor private to prevent the users from deriving from this class.
52  private:
53  host_udf_base() = default;
54 
55  // Only allow deriving from the structs below.
56  friend struct reduce_host_udf;
57  friend struct segmented_reduce_host_udf;
58  friend struct groupby_host_udf;
59 
60  public:
61  virtual ~host_udf_base() = default;
62 
71  [[nodiscard]] virtual std::size_t do_hash() const
72  {
73  return std::hash<int>{}(static_cast<int>(aggregation::Kind::HOST_UDF));
74  }
75 
81  [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0;
82 
90  [[nodiscard]] virtual std::unique_ptr<host_udf_base> clone() const = 0;
91 };
92 
142  [[nodiscard]] virtual std::unique_ptr<scalar> operator()(
143  column_view const& input,
144  data_type output_dtype,
145  std::optional<std::reference_wrapper<scalar const>> init,
146  rmm::cuda_stream_view stream,
147  rmm::device_async_resource_ref mr) const = 0;
148 };
149 
204  [[nodiscard]] virtual std::unique_ptr<column> operator()(
205  column_view const& input,
207  data_type output_dtype,
208  null_policy null_handling,
209  std::optional<std::reference_wrapper<scalar const>> init,
210  rmm::cuda_stream_view stream,
211  rmm::device_async_resource_ref mr) const = 0;
212 };
213 
214 // Forward declaration.
215 namespace groupby ::detail {
216 struct aggregate_result_functor;
217 }
218 
278  [[nodiscard]] virtual std::unique_ptr<column> get_empty_output(
280 
288  [[nodiscard]] virtual std::unique_ptr<column> operator()(
290 
291  private:
292  // Allow the struct `aggregate_result_functor` to set its private callback variables.
293  friend struct groupby::detail::aggregate_result_functor;
294 
298  std::function<column_view(void)> callback_input_values;
299 
304  std::function<column_view(void)> callback_grouped_values;
305 
310  std::function<column_view(void)> callback_sorted_grouped_values;
311 
315  std::function<size_type(void)> callback_num_groups;
316 
320  std::function<device_span<size_type const>(void)> callback_group_offsets;
321 
325  std::function<device_span<size_type const>(void)> callback_group_labels;
326 
330  std::function<column_view(std::unique_ptr<aggregation>)> callback_compute_aggregation;
331 
332  protected:
338  [[nodiscard]] column_view get_input_values() const
339  {
340  CUDF_EXPECTS(callback_input_values, "Uninitialized callback_input_values.");
341  return callback_input_values();
342  }
343 
350  [[nodiscard]] column_view get_grouped_values() const
351  {
352  CUDF_EXPECTS(callback_grouped_values, "Uninitialized callback_grouped_values.");
353  return callback_grouped_values();
354  }
355 
363  {
364  CUDF_EXPECTS(callback_sorted_grouped_values, "Uninitialized callback_sorted_grouped_values.");
365  return callback_sorted_grouped_values();
366  }
367 
373  [[nodiscard]] size_type get_num_groups() const
374  {
375  CUDF_EXPECTS(callback_num_groups, "Uninitialized callback_num_groups.");
376  return callback_num_groups();
377  }
378 
385  {
386  CUDF_EXPECTS(callback_group_offsets, "Uninitialized callback_group_offsets.");
387  return callback_group_offsets();
388  }
389 
396  {
397  CUDF_EXPECTS(callback_group_labels, "Uninitialized callback_group_labels.");
398  return callback_group_labels();
399  }
400 
410  [[nodiscard]] column_view compute_aggregation(std::unique_ptr<aggregation> other_agg) const
411  {
412  CUDF_EXPECTS(callback_compute_aggregation, "Uninitialized callback for computing aggregation.");
413  return callback_compute_aggregation(std::move(other_agg));
414  }
415 };
416  // end of group
418 } // namespace CUDF_EXPORT cudf
Representation for specifying desired aggregations from aggregation-based APIs, e....
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
The fundamental interface for host-based UDF implementation.
Definition: host_udf.hpp:50
virtual ~host_udf_base()=default
Default destructor.
virtual bool is_equal(host_udf_base const &other) const =0
Compares two instances of the derived class for equality.
virtual std::unique_ptr< host_udf_base > clone() const =0
Clones the instance.
virtual std::size_t do_hash() const
Computes hash value of the instance.
Definition: host_udf.hpp:71
column view class definitions
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:178
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
null_policy
Enum to specify whether to include nulls or exclude nulls.
Definition: types.hpp:126
cuDF interfaces
Definition: host_udf.hpp:37
APIs for spans.
Device version of C++20 std::span with reduced feature set.
Definition: span.hpp:351
The interface for host-based UDF implementation for groupby aggregation context.
Definition: host_udf.hpp:267
device_span< size_type const > get_group_labels() const
Access the group labels (which is also the same as group indices).
Definition: host_udf.hpp:395
size_type get_num_groups() const
Access the number of groups (i.e., number of distinct keys).
Definition: host_udf.hpp:373
column_view get_grouped_values() const
Access the input values grouped according to the input keys for which the values within each group ma...
Definition: host_udf.hpp:350
column_view get_sorted_grouped_values() const
Access the input values grouped according to the input keys and sorted within each group.
Definition: host_udf.hpp:362
column_view get_input_values() const
Access the input values column.
Definition: host_udf.hpp:338
column_view compute_aggregation(std::unique_ptr< aggregation > other_agg) const
Compute a built-in groupby aggregation and access its result.
Definition: host_udf.hpp:410
virtual std::unique_ptr< column > get_empty_output(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Get the output when the input values column is empty.
virtual std::unique_ptr< column > operator()(rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform the main groupby computation for the host-based UDF.
device_span< size_type const > get_group_offsets() const
Access the offsets separating groups.
Definition: host_udf.hpp:384
The interface for host-based UDF implementation for reduction contexts.
Definition: host_udf.hpp:131
virtual std::unique_ptr< scalar > operator()(column_view const &input, data_type output_dtype, std::optional< std::reference_wrapper< scalar const >> init, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform reduction operations.
The interface for host-based UDF implementation for segmented reduction context.
Definition: host_udf.hpp:189
virtual std::unique_ptr< column > operator()(column_view const &input, device_span< size_type const > offsets, data_type output_dtype, null_policy null_handling, std::optional< std::reference_wrapper< scalar const >> init, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const =0
Perform segmented reduction operations.
Type declarations for libcudf.