expressions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/scalar/scalar.hpp>
21 #include <cudf/types.hpp>
22 #include <cudf/utilities/error.hpp>
23 
24 #include <cstdint>
25 #include <memory>
26 #include <vector>
27 
28 namespace CUDF_EXPORT cudf {
29 namespace ast {
36 // Forward declaration.
37 namespace detail {
38 class expression_parser;
39 class expression_transformer;
40 } // namespace detail
41 
48 struct expression {
55  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
56 
63  virtual std::reference_wrapper<expression const> accept(
64  detail::expression_transformer& visitor) const = 0;
65 
73  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
74  {
75  return may_evaluate_null(left, left, stream);
76  }
77 
86  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
87  table_view const& right,
88  rmm::cuda_stream_view stream) const = 0;
89 
90  virtual ~expression() {}
91 };
92 
96 enum class ast_operator : int32_t {
97  // Binary operators
98  ADD,
99  SUB,
100  MUL,
101  DIV,
102  TRUE_DIV,
103  FLOOR_DIV,
105  MOD,
106  PYMOD,
107  POW,
108  EQUAL,
109  NULL_EQUAL,
112  NOT_EQUAL,
113  LESS,
114  GREATER,
115  LESS_EQUAL,
116  GREATER_EQUAL,
117  BITWISE_AND,
118  BITWISE_OR,
119  BITWISE_XOR,
120  LOGICAL_AND,
125  LOGICAL_OR,
130  // Unary operators
131  IDENTITY,
132  IS_NULL,
133  SIN,
134  COS,
135  TAN,
136  ARCSIN,
137  ARCCOS,
138  ARCTAN,
139  SINH,
140  COSH,
141  TANH,
142  ARCSINH,
143  ARCCOSH,
144  ARCTANH,
145  EXP,
146  LOG,
147  SQRT,
148  CBRT,
149  CEIL,
150  FLOOR,
151  ABS,
152  RINT,
153  BIT_INVERT,
154  NOT,
155  CAST_TO_INT64,
158 };
159 
165 enum class table_reference {
166  LEFT,
167  RIGHT,
168  OUTPUT
169 };
170 
175  public:
182  template <typename T>
183  __device__ T const value() const noexcept
184  {
185  if constexpr (std::is_same_v<T, cudf::string_view>) {
186  return string_view(static_cast<char const*>(_data), _size);
187  }
188  return *static_cast<T const*>(_data);
189  }
190 
195  template <typename T>
197  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
198  {
199  }
200 
205  template <typename T>
207  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
208  {
209  }
210 
215  template <typename T>
217  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
218  {
219  }
220 
226  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
227  {
228  }
229 
230  protected:
231  void const* _data{};
232  size_type const _size{};
233 
242  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
243  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
244  {
245  }
246 
255  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
256  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
257  {
258  }
259 };
260 
264 class literal : public expression {
265  public:
272  template <typename T>
273  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
274  {
275  }
276 
283  template <typename T>
284  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
285  {
286  }
287 
294  template <typename T>
295  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
296  {
297  }
298 
304  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
305 
311  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
312 
318  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
319 
323  cudf::size_type accept(detail::expression_parser& visitor) const override;
324 
328  std::reference_wrapper<expression const> accept(
329  detail::expression_transformer& visitor) const override;
330 
331  [[nodiscard]] bool may_evaluate_null(table_view const& left,
332  table_view const& right,
333  rmm::cuda_stream_view stream) const override
334  {
335  return !is_valid(stream);
336  }
337 
344  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
345  {
346  return scalar.is_valid(stream);
347  }
348 
349  private:
350  cudf::scalar const& scalar;
351  generic_scalar_device_view const value;
352 };
353 
357 class column_reference : public expression {
358  public:
367  table_reference table_source = table_reference::LEFT)
368  : column_index(column_index), table_source(table_source)
369  {
370  }
371 
377  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
378 
384  [[nodiscard]] table_reference get_table_source() const { return table_source; }
385 
392  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
393  {
394  return table.column(get_column_index()).type();
395  }
396 
404  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
405  table_view const& right_table) const
406  {
407  auto const table = [&] {
408  if (get_table_source() == table_reference::LEFT) {
409  return left_table;
410  } else if (get_table_source() == table_reference::RIGHT) {
411  return right_table;
412  } else {
413  CUDF_FAIL("Column reference data type cannot be determined from unknown table.");
414  }
415  }();
416  return table.column(get_column_index()).type();
417  }
418 
422  cudf::size_type accept(detail::expression_parser& visitor) const override;
423 
427  std::reference_wrapper<expression const> accept(
428  detail::expression_transformer& visitor) const override;
429 
430  [[nodiscard]] bool may_evaluate_null(table_view const& left,
431  table_view const& right,
432  rmm::cuda_stream_view stream) const override
433  {
434  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
435  }
436 
437  private:
438  cudf::size_type column_index;
439  table_reference table_source;
440 };
441 
445 class operation : public expression {
446  public:
453  operation(ast_operator op, expression const& input);
454 
462  operation(ast_operator op, expression const& left, expression const& right);
463 
464  // operation only stores references to expressions, so it does not accept r-value
465  // references: the calling code must own the expressions.
466  operation(ast_operator op, expression&& input) = delete;
467  operation(ast_operator op, expression&& left, expression&& right) = delete;
468  operation(ast_operator op, expression&& left, expression const& right) = delete;
469  operation(ast_operator op, expression const& left, expression&& right) = delete;
470 
476  [[nodiscard]] ast_operator get_operator() const { return op; }
477 
483  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
484  {
485  return operands;
486  }
487 
491  cudf::size_type accept(detail::expression_parser& visitor) const override;
492 
496  std::reference_wrapper<expression const> accept(
497  detail::expression_transformer& visitor) const override;
498 
499  [[nodiscard]] bool may_evaluate_null(table_view const& left,
500  table_view const& right,
501  rmm::cuda_stream_view stream) const override
502  {
503  return std::any_of(operands.cbegin(),
504  operands.cend(),
505  [&left, &right, &stream](std::reference_wrapper<expression const> subexpr) {
506  return subexpr.get().may_evaluate_null(left, right, stream);
507  });
508  };
509 
510  private:
511  ast_operator op;
512  std::vector<std::reference_wrapper<expression const>> operands;
513 };
514 
519  public:
526  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
527 
533  [[nodiscard]] std::string get_column_name() const { return column_name; }
534 
538  cudf::size_type accept(detail::expression_parser& visitor) const override;
539 
543  std::reference_wrapper<expression const> accept(
544  detail::expression_transformer& visitor) const override;
545 
546  [[nodiscard]] bool may_evaluate_null(table_view const& left,
547  table_view const& right,
548  rmm::cuda_stream_view stream) const override
549  {
550  return true;
551  }
552 
553  private:
554  std::string column_name;
555 };
556 
561 class tree {
562  public:
566  tree() = default;
567 
571  tree(tree&&) = default;
572 
577  tree& operator=(tree&&) = default;
578 
579  ~tree() = default;
580 
581  // the tree is not copyable
582  tree(tree const&) = delete;
583  tree& operator=(tree const&) = delete;
584 
590  template <typename Expr, typename... Args>
591  Expr const& emplace(Args&&... args)
592  {
593  static_assert(std::is_base_of_v<expression, Expr>);
594  auto expr = std::make_shared<Expr>(std::forward<Args>(args)...);
595  Expr const& expr_ref = *expr;
596  expressions.emplace_back(std::static_pointer_cast<expression>(std::move(expr)));
597  return expr_ref;
598  }
599 
605  template <typename Expr>
606  Expr const& push(Expr expr)
607  {
608  return emplace<Expr>(std::move(expr));
609  }
610 
615  expression const& front() const { return *expressions.front(); }
616 
621  expression const& back() const { return *expressions.back(); }
622 
627  size_t size() const { return expressions.size(); }
628 
634  expression const& at(size_t index) { return *expressions.at(index); }
635 
641  expression const& operator[](size_t index) const { return *expressions[index]; }
642 
643  private:
644  // TODO: use better ownership semantics, the shared_ptr here is redundant. Consider using a bump
645  // allocator with type-erased deleters.
646  std::vector<std::shared_ptr<expression>> expressions;
647 };
648  // end of group
650 } // namespace ast
651 
652 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::vector< std::reference_wrapper< expression const > > const & get_operands() const
Get the operands.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
An AST expression tree. It owns and contains multiple dependent expressions. All the expressions are ...
tree(tree &&)=default
Moves the ast tree.
tree & operator=(tree &&)=default
move-assigns the AST tree
size_t size() const
get the number of expressions added to the tree
tree()=default
construct an empty ast tree
Expr const & push(Expr expr)
Add an expression to the AST tree.
Expr const & emplace(Args &&... args)
Add an expression to the AST tree.
expression const & front() const
get the first expression in the tree
expression const & at(size_t index)
get the expression at an index in the tree. Index is checked.
expression const & back() const
get the last expression in the tree
expression const & operator[](size_t index) const
get the expression at an index in the tree. Index is unchecked.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:680
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:242
An owning class to represent a singular value.
Definition: scalar.hpp:49
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:429
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:624
table_reference
Enum of table references.
ast_operator
Enum of supported operators.
Definition: expressions.hpp:96
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
@ NOT
Logical Not (!)
@ TANH
Hyperbolic tangent.
@ DIV
operator / using common type of lhs and rhs
@ CBRT
Cube-root (x^(1.0/3))
@ ARCSINH
Hyperbolic sine inverse.
@ SQRT
Square-root (x^0.5)
@ PYMOD
operator % using Python's sign rules for negatives
@ LOG
Natural Logarithm (base e)
@ FLOOR
largest integer value not greater than arg
@ ARCTAN
Trigonometric tangent inverse.
@ SIN
Trigonometric sine.
@ CEIL
Smallest integer value not less than arg.
@ ARCSIN
Trigonometric sine inverse.
@ RINT
Rounds the floating-point argument arg to an integer value.
@ BIT_INVERT
Bitwise Not (~)
@ TAN
Trigonometric tangent.
@ ARCCOS
Trigonometric cosine inverse.
@ ABS
Absolute value.
@ ARCTANH
Hyperbolic tangent inverse.
@ GREATER_EQUAL
operator >=
@ EXP
Exponential (base e, Euler number)
@ CAST_TO_FLOAT64
Cast value to double.
@ TRUE_DIV
operator / after promoting type to floating point
@ LOGICAL_AND
operator &&
@ SINH
Hyperbolic sine.
@ CAST_TO_UINT64
Cast value to uint64_t.
@ COSH
Hyperbolic cosine.
@ ARCCOSH
Hyperbolic cosine inverse.
@ IDENTITY
Identity function.
@ IS_NULL
Check if operand is null.
@ CAST_TO_INT64
Cast value to int64_t.
@ COS
Trigonometric cosine.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: aggregation.hpp:35
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:48
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
Definition: expressions.hpp:73
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Class definitions for (mutable)_table_view
Type declarations for libcudf.