expressions.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
9 #include <cudf/scalar/scalar.hpp>
12 #include <cudf/types.hpp>
13 #include <cudf/utilities/error.hpp>
14 
15 #include <cstdint>
16 #include <memory>
17 #include <vector>
18 
19 namespace CUDF_EXPORT cudf {
20 
21 namespace detail {
22 namespace row_ir {
23 
33 struct node;
34 
38 struct ast_converter;
39 
40 } // namespace row_ir
41 } // namespace detail
42 
43 namespace ast {
50 // Forward declaration.
51 namespace detail {
52 class expression_parser;
53 class expression_transformer;
54 } // namespace detail
55 
62 struct [[nodiscard]] expression {
69  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
70 
77  virtual std::reference_wrapper<expression const> accept(
78  detail::expression_transformer& visitor) const = 0;
79 
86  [[nodiscard]] virtual std::unique_ptr<cudf::detail::row_ir::node> accept(
87  cudf::detail::row_ir::ast_converter& visitor) const = 0;
88 
96  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
97  {
98  return may_evaluate_null(left, left, stream);
99  }
100 
109  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
110  table_view const& right,
111  rmm::cuda_stream_view stream) const = 0;
112 
113  virtual ~expression() {}
114 };
115 
121 enum class table_reference {
122  LEFT,
123  RIGHT,
124  OUTPUT
125 };
126 
131  public:
138  template <typename T>
139  __device__ T const value() const noexcept
140  {
141  if constexpr (std::is_same_v<T, cudf::string_view>) {
142  return string_view(static_cast<char const*>(_data), _size);
143  }
144  if constexpr (cudf::is_fixed_point<T>()) {
145  using rep_type = typename T::rep;
146  auto const rep = *static_cast<rep_type const*>(_data);
147  auto const scale = numeric::scale_type{type().scale()};
148  return T{numeric::scaled_integer<rep_type>{rep, scale}};
149  }
150  return *static_cast<T const*>(_data);
151  }
152 
157  template <typename T>
159  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
160  {
161  }
162 
167  template <typename T>
169  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
170  {
171  }
172 
177  template <typename T>
179  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
180  {
181  }
182 
188  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
189  {
190  }
191 
196  template <typename T>
198  : generic_scalar_device_view{s.type(), s.data(), s.validity_data()}
199  {
200  }
201 
202  protected:
203  void const* _data{};
204  size_type const _size{};
205 
214  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
215  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
216  {
217  }
218 
227  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
228  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
229  {
230  }
231 };
232 
236 class literal : public expression {
237  public:
244  template <typename T>
245  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
246  {
247  }
248 
255  template <typename T>
256  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
257  {
258  }
259 
266  template <typename T>
267  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
268  {
269  }
270 
276  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
277 
283  template <typename T>
284  literal(cudf::fixed_point_scalar<T>& value) : scalar(value), value(value)
285  {
286  }
287 
293  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
294 
300  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
301 
307  [[nodiscard]] cudf::scalar const& get_scalar() const { return scalar; }
308 
312  cudf::size_type accept(detail::expression_parser& visitor) const override;
313 
317  std::reference_wrapper<expression const> accept(
318  detail::expression_transformer& visitor) const override;
319 
323  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
324  cudf::detail::row_ir::ast_converter& visitor) const override;
325 
326  [[nodiscard]] bool may_evaluate_null(table_view const& left,
327  table_view const& right,
328  rmm::cuda_stream_view stream) const override
329  {
330  return !is_valid(stream);
331  }
332 
339  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
340  {
341  return scalar.is_valid(stream);
342  }
343 
344  private:
345  cudf::scalar const& scalar;
346  generic_scalar_device_view const value;
347 };
348 
352 class column_reference : public expression {
353  public:
362  table_reference table_source = table_reference::LEFT)
363  : column_index(column_index), table_source(table_source)
364  {
365  }
366 
372  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
373 
379  [[nodiscard]] table_reference get_table_source() const { return table_source; }
380 
387  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const;
388 
396  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
397  table_view const& right_table) const;
398 
402  cudf::size_type accept(detail::expression_parser& visitor) const override;
403 
407  std::reference_wrapper<expression const> accept(
408  detail::expression_transformer& visitor) const override;
409 
410  [[nodiscard]] bool may_evaluate_null(table_view const& left,
411  table_view const& right,
412  rmm::cuda_stream_view stream) const override
413  {
414  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
415  }
416 
420  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
421  cudf::detail::row_ir::ast_converter& visitor) const override;
422 
423  private:
424  cudf::size_type column_index;
425  table_reference table_source;
426 };
427 
431 class operation : public expression {
432  public:
439  operation(ast_operator op, expression const& input);
440 
448  operation(ast_operator op, expression const& left, expression const& right);
449 
450  // operation only stores references to expressions, so it does not accept r-value
451  // references: the calling code must own the expressions.
452  operation(ast_operator op, expression&& input) = delete;
453  operation(ast_operator op, expression&& left, expression&& right) = delete;
454  operation(ast_operator op, expression&& left, expression const& right) = delete;
455  operation(ast_operator op, expression const& left, expression&& right) = delete;
456 
462  [[nodiscard]] ast_operator get_operator() const { return op; }
463 
469  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
470  {
471  return operands;
472  }
473 
477  cudf::size_type accept(detail::expression_parser& visitor) const override;
478 
482  std::reference_wrapper<expression const> accept(
483  detail::expression_transformer& visitor) const override;
484 
485  [[nodiscard]] bool may_evaluate_null(table_view const& left,
486  table_view const& right,
487  rmm::cuda_stream_view stream) const override;
488 
492  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
493  cudf::detail::row_ir::ast_converter& visitor) const override;
494 
495  private:
496  ast_operator op;
497  std::vector<std::reference_wrapper<expression const>> operands;
498 };
499 
500 namespace detail {
501 
506 class predicate : public expression {
507  public:
512  predicate(expression const& source) : source_{source} {}
513 
517  cudf::size_type accept(detail::expression_parser& visitor) const override;
518 
522  std::reference_wrapper<expression const> accept(
523  detail::expression_transformer& visitor) const override;
524 
525  [[nodiscard]] bool may_evaluate_null(table_view const& left,
526  table_view const& right,
527  rmm::cuda_stream_view stream) const override;
528 
532  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
533  cudf::detail::row_ir::ast_converter& visitor) const override;
534 
539  [[nodiscard]] expression const& get_operand() const { return source_; }
540 
541  private:
542  std::reference_wrapper<expression const> source_;
543 };
544 
545 } // namespace detail
546 
551  public:
558  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
559 
565  [[nodiscard]] std::string get_column_name() const { return column_name; }
566 
570  cudf::size_type accept(detail::expression_parser& visitor) const override;
571 
575  std::reference_wrapper<expression const> accept(
576  detail::expression_transformer& visitor) const override;
577 
578  [[nodiscard]] bool may_evaluate_null(table_view const& left,
579  table_view const& right,
580  rmm::cuda_stream_view stream) const override
581  {
582  return true;
583  }
584 
588  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
589  cudf::detail::row_ir::ast_converter& visitor) const override;
590 
591  private:
592  std::string column_name;
593 };
594 
599 class tree {
600  public:
604  tree() = default;
605 
609  tree(tree&&) = default;
610 
615  tree& operator=(tree&&) = default;
616 
617  ~tree() = default;
618 
619  // the tree is not copyable
620  tree(tree const&) = delete;
621  tree& operator=(tree const&) = delete;
622 
628  template <typename Expr, typename... Args>
629  std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
630  {
631  auto expr = std::make_unique<Expr>(std::forward<Args>(args)...);
632  Expr const& expr_ref = *expr;
633  expressions.emplace_back(std::move(expr));
634  return expr_ref;
635  }
636 
642  template <typename Expr>
643  decltype(auto) push(Expr expr)
644  {
645  return emplace<Expr>(std::move(expr));
646  }
647 
652  [[nodiscard]] expression const& front() const { return *expressions.front(); }
653 
658  [[nodiscard]] expression const& back() const { return *expressions.back(); }
659 
664  [[nodiscard]] size_t size() const { return expressions.size(); }
665 
671  expression const& at(size_t index) { return *expressions.at(index); }
672 
678  expression const& operator[](size_t index) const { return *expressions[index]; }
679 
680  private:
681  // TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
682  // allocator with type-erased deleters.
683  std::vector<std::unique_ptr<expression>> expressions;
684 };
685  // end of group
687 } // namespace ast
688 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
An expression that represents a predicate.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
predicate(expression const &source)
Construct a new filter predicate object.
expression const & get_operand() const
Get the operand expression.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(cudf::fixed_point_scalar< T > &s)
Construct a new generic scalar device view object from a fixed-point scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
cudf::scalar const & get_scalar() const
Get the scalar.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::fixed_point_scalar< T > &value)
Construct a new literal object.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::vector< std::reference_wrapper< expression const > > const & get_operands() const
Get the operands.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
An AST expression tree. It owns and contains multiple dependent expressions. All the expressions are ...
tree(tree &&)=default
Moves the ast tree.
tree & operator=(tree &&)=default
move-assigns the AST tree
size_t size() const
get the number of expressions added to the tree
std::enable_if_t< std::is_base_of_v< expression, Expr >, Expr const & > emplace(Args &&... args)
Add an expression to the AST tree.
tree()=default
construct an empty ast tree
expression const & front() const
get the first expression in the tree
expression const & at(size_t index)
get the expression at an index in the tree. Index is checked.
expression const & back() const
get the last expression in the tree
expression const & operator[](size_t index) const
get the expression at an index in the tree. Index is unchecked.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:278
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:657
An owning class to represent a fixed_point number in device memory.
Definition: scalar.hpp:289
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:229
An owning class to represent a singular value.
Definition: scalar.hpp:41
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:411
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:35
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:601
Class definition for fixed point data type.
table_reference
Enum of table references.
ast_operator
Enum of supported operators.
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
scale_type
The scale type for fixed_point.
Definition: fixed_point.hpp:35
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:85
cuDF interfaces
Definition: host_udf.hpp:26
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:62
virtual std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const =0
Accepts an row_ir::ast_converter class.
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
Definition: expressions.hpp:96
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Helper struct for constructing fixed_point when value is already shifted.
Class definitions for (mutable)_table_view
Type declarations for libcudf.