expressions.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
9 #include <cudf/scalar/scalar.hpp>
12 #include <cudf/types.hpp>
13 #include <cudf/utilities/error.hpp>
14 
15 #include <cstdint>
16 #include <memory>
17 #include <vector>
18 
19 namespace CUDF_EXPORT cudf {
20 
21 namespace detail {
22 namespace row_ir {
23 
33 struct node;
34 
38 struct ast_converter;
39 
40 } // namespace row_ir
41 } // namespace detail
42 
43 namespace ast {
50 // Forward declaration.
51 namespace detail {
52 class expression_parser;
53 class expression_transformer;
54 } // namespace detail
55 
62 struct expression {
69  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
70 
77  virtual std::reference_wrapper<expression const> accept(
78  detail::expression_transformer& visitor) const = 0;
79 
86  [[nodiscard]] virtual std::unique_ptr<cudf::detail::row_ir::node> accept(
87  cudf::detail::row_ir::ast_converter& visitor) const = 0;
88 
96  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
97  {
98  return may_evaluate_null(left, left, stream);
99  }
100 
109  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
110  table_view const& right,
111  rmm::cuda_stream_view stream) const = 0;
112 
113  virtual ~expression() {}
114 };
115 
121 enum class table_reference {
122  LEFT,
123  RIGHT,
124  OUTPUT
125 };
126 
131  public:
138  template <typename T>
139  __device__ T const value() const noexcept
140  {
141  if constexpr (std::is_same_v<T, cudf::string_view>) {
142  return string_view(static_cast<char const*>(_data), _size);
143  }
144  if constexpr (cudf::is_fixed_point<T>()) {
145  using rep_type = typename T::rep;
146  auto const rep = *static_cast<rep_type const*>(_data);
147  auto const scale = numeric::scale_type{type().scale()};
148  return T{numeric::scaled_integer<rep_type>{rep, scale}};
149  }
150  return *static_cast<T const*>(_data);
151  }
152 
157  template <typename T>
159  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
160  {
161  }
162 
167  template <typename T>
169  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
170  {
171  }
172 
177  template <typename T>
179  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
180  {
181  }
182 
188  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
189  {
190  }
191 
196  template <typename T>
198  : generic_scalar_device_view{s.type(), s.data(), s.validity_data()}
199  {
200  }
201 
202  protected:
203  void const* _data{};
204  size_type const _size{};
205 
214  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
215  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
216  {
217  }
218 
227  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
228  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
229  {
230  }
231 };
232 
236 class literal : public expression {
237  public:
244  template <typename T>
245  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
246  {
247  }
248 
255  template <typename T>
256  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
257  {
258  }
259 
266  template <typename T>
267  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
268  {
269  }
270 
276  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
277 
283  template <typename T>
284  literal(cudf::fixed_point_scalar<T>& value) : scalar(value), value(value)
285  {
286  }
287 
293  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
294 
300  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
301 
307  [[nodiscard]] cudf::scalar const& get_scalar() const { return scalar; }
308 
312  cudf::size_type accept(detail::expression_parser& visitor) const override;
313 
317  std::reference_wrapper<expression const> accept(
318  detail::expression_transformer& visitor) const override;
319 
323  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
324  cudf::detail::row_ir::ast_converter& visitor) const override;
325 
326  [[nodiscard]] bool may_evaluate_null(table_view const& left,
327  table_view const& right,
328  rmm::cuda_stream_view stream) const override
329  {
330  return !is_valid(stream);
331  }
332 
339  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
340  {
341  return scalar.is_valid(stream);
342  }
343 
344  private:
345  cudf::scalar const& scalar;
346  generic_scalar_device_view const value;
347 };
348 
352 class column_reference : public expression {
353  public:
362  table_reference table_source = table_reference::LEFT)
363  : column_index(column_index), table_source(table_source)
364  {
365  }
366 
372  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
373 
379  [[nodiscard]] table_reference get_table_source() const { return table_source; }
380 
387  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
388  {
389  return table.column(get_column_index()).type();
390  }
391 
399  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
400  table_view const& right_table) const
401  {
402  auto const table = [&] {
403  if (get_table_source() == table_reference::LEFT) {
404  return left_table;
405  } else if (get_table_source() == table_reference::RIGHT) {
406  return right_table;
407  } else {
408  CUDF_FAIL("Column reference data type cannot be determined from unknown table.");
409  }
410  }();
411  return table.column(get_column_index()).type();
412  }
413 
417  cudf::size_type accept(detail::expression_parser& visitor) const override;
418 
422  std::reference_wrapper<expression const> accept(
423  detail::expression_transformer& visitor) const override;
424 
425  [[nodiscard]] bool may_evaluate_null(table_view const& left,
426  table_view const& right,
427  rmm::cuda_stream_view stream) const override
428  {
429  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
430  }
431 
435  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
436  cudf::detail::row_ir::ast_converter& visitor) const override;
437 
438  private:
439  cudf::size_type column_index;
440  table_reference table_source;
441 };
442 
446 class operation : public expression {
447  public:
454  operation(ast_operator op, expression const& input);
455 
463  operation(ast_operator op, expression const& left, expression const& right);
464 
465  // operation only stores references to expressions, so it does not accept r-value
466  // references: the calling code must own the expressions.
467  operation(ast_operator op, expression&& input) = delete;
468  operation(ast_operator op, expression&& left, expression&& right) = delete;
469  operation(ast_operator op, expression&& left, expression const& right) = delete;
470  operation(ast_operator op, expression const& left, expression&& right) = delete;
471 
477  [[nodiscard]] ast_operator get_operator() const { return op; }
478 
484  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
485  {
486  return operands;
487  }
488 
492  cudf::size_type accept(detail::expression_parser& visitor) const override;
493 
497  std::reference_wrapper<expression const> accept(
498  detail::expression_transformer& visitor) const override;
499 
500  [[nodiscard]] bool may_evaluate_null(table_view const& left,
501  table_view const& right,
502  rmm::cuda_stream_view stream) const override;
503 
507  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
508  cudf::detail::row_ir::ast_converter& visitor) const override;
509 
510  private:
511  ast_operator op;
512  std::vector<std::reference_wrapper<expression const>> operands;
513 };
514 
515 namespace detail {
516 
521 class filter_predicate : public expression {
522  public:
527  filter_predicate(expression const& source) : source_{source} {}
528 
532  cudf::size_type accept(detail::expression_parser& visitor) const override;
533 
537  std::reference_wrapper<expression const> accept(
538  detail::expression_transformer& visitor) const override;
539 
540  [[nodiscard]] bool may_evaluate_null(table_view const& left,
541  table_view const& right,
542  rmm::cuda_stream_view stream) const override;
543 
547  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
548  cudf::detail::row_ir::ast_converter& visitor) const override;
549 
554  [[nodiscard]] expression const& get_operand() const { return source_; }
555 
556  private:
557  std::reference_wrapper<expression const> source_;
558 };
559 
560 } // namespace detail
561 
566  public:
573  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
574 
580  [[nodiscard]] std::string get_column_name() const { return column_name; }
581 
585  cudf::size_type accept(detail::expression_parser& visitor) const override;
586 
590  std::reference_wrapper<expression const> accept(
591  detail::expression_transformer& visitor) const override;
592 
593  [[nodiscard]] bool may_evaluate_null(table_view const& left,
594  table_view const& right,
595  rmm::cuda_stream_view stream) const override
596  {
597  return true;
598  }
599 
603  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
604  cudf::detail::row_ir::ast_converter& visitor) const override;
605 
606  private:
607  std::string column_name;
608 };
609 
614 class tree {
615  public:
619  tree() = default;
620 
624  tree(tree&&) = default;
625 
630  tree& operator=(tree&&) = default;
631 
632  ~tree() = default;
633 
634  // the tree is not copyable
635  tree(tree const&) = delete;
636  tree& operator=(tree const&) = delete;
637 
643  template <typename Expr, typename... Args>
644  std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
645  {
646  auto expr = std::make_unique<Expr>(std::forward<Args>(args)...);
647  Expr const& expr_ref = *expr;
648  expressions.emplace_back(std::move(expr));
649  return expr_ref;
650  }
651 
657  template <typename Expr>
658  decltype(auto) push(Expr expr)
659  {
660  return emplace<Expr>(std::move(expr));
661  }
662 
667  [[nodiscard]] expression const& front() const { return *expressions.front(); }
668 
673  [[nodiscard]] expression const& back() const { return *expressions.back(); }
674 
679  [[nodiscard]] size_t size() const { return expressions.size(); }
680 
686  expression const& at(size_t index) { return *expressions.at(index); }
687 
693  expression const& operator[](size_t index) const { return *expressions[index]; }
694 
695  private:
696  // TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
697  // allocator with type-erased deleters.
698  std::vector<std::unique_ptr<expression>> expressions;
699 };
700  // end of group
702 } // namespace ast
703 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
An expression that represents a filter predicate.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
filter_predicate(expression const &source)
Construct a new filter predicate object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
expression const & get_operand() const
Get the operand expression.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(cudf::fixed_point_scalar< T > &s)
Construct a new generic scalar device view object from a fixed-point scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
cudf::scalar const & get_scalar() const
Get the scalar.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::fixed_point_scalar< T > &value)
Construct a new literal object.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::vector< std::reference_wrapper< expression const > > const & get_operands() const
Get the operands.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
An AST expression tree. It owns and contains multiple dependent expressions. All the expressions are ...
tree(tree &&)=default
Moves the ast tree.
tree & operator=(tree &&)=default
move-assigns the AST tree
size_t size() const
get the number of expressions added to the tree
std::enable_if_t< std::is_base_of_v< expression, Expr >, Expr const & > emplace(Args &&... args)
Add an expression to the AST tree.
tree()=default
construct an empty ast tree
expression const & front() const
get the first expression in the tree
expression const & at(size_t index)
get the expression at an index in the tree. Index is checked.
expression const & back() const
get the last expression in the tree
expression const & operator[](size_t index) const
get the expression at an index in the tree. Index is unchecked.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:269
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:656
An owning class to represent a fixed_point number in device memory.
Definition: scalar.hpp:288
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:228
An owning class to represent a singular value.
Definition: scalar.hpp:40
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:410
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:33
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:600
Class definition for fixed point data type.
table_reference
Enum of table references.
ast_operator
Enum of supported operators.
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
scale_type
The scale type for fixed_point.
Definition: fixed_point.hpp:33
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:182
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:62
virtual std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const =0
Accepts an row_ir::ast_converter class.
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
Definition: expressions.hpp:96
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Helper struct for constructing fixed_point when value is already shifted.
Class definitions for (mutable)_table_view
Type declarations for libcudf.