expressions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2024, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <cudf/scalar/scalar.hpp>
21 #include <cudf/types.hpp>
22 #include <cudf/utilities/error.hpp>
23 
24 #include <cstdint>
25 
26 namespace CUDF_EXPORT cudf {
27 namespace ast {
34 // Forward declaration.
35 namespace detail {
36 class expression_parser;
37 class expression_transformer;
38 } // namespace detail
39 
46 struct expression {
53  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
54 
61  virtual std::reference_wrapper<expression const> accept(
62  detail::expression_transformer& visitor) const = 0;
63 
71  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
72  {
73  return may_evaluate_null(left, left, stream);
74  }
75 
84  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
85  table_view const& right,
86  rmm::cuda_stream_view stream) const = 0;
87 
88  virtual ~expression() {}
89 };
90 
94 enum class ast_operator : int32_t {
95  // Binary operators
96  ADD,
97  SUB,
98  MUL,
99  DIV,
100  TRUE_DIV,
101  FLOOR_DIV,
103  MOD,
104  PYMOD,
105  POW,
106  EQUAL,
107  NULL_EQUAL,
110  NOT_EQUAL,
111  LESS,
112  GREATER,
113  LESS_EQUAL,
114  GREATER_EQUAL,
115  BITWISE_AND,
116  BITWISE_OR,
117  BITWISE_XOR,
118  LOGICAL_AND,
123  LOGICAL_OR,
128  // Unary operators
129  IDENTITY,
130  IS_NULL,
131  SIN,
132  COS,
133  TAN,
134  ARCSIN,
135  ARCCOS,
136  ARCTAN,
137  SINH,
138  COSH,
139  TANH,
140  ARCSINH,
141  ARCCOSH,
142  ARCTANH,
143  EXP,
144  LOG,
145  SQRT,
146  CBRT,
147  CEIL,
148  FLOOR,
149  ABS,
150  RINT,
151  BIT_INVERT,
152  NOT,
153  CAST_TO_INT64,
156 };
157 
163 enum class table_reference {
164  LEFT,
165  RIGHT,
166  OUTPUT
167 };
168 
173  public:
180  template <typename T>
181  __device__ T const value() const noexcept
182  {
183  if constexpr (std::is_same_v<T, cudf::string_view>) {
184  return string_view(static_cast<char const*>(_data), _size);
185  }
186  return *static_cast<T const*>(_data);
187  }
188 
193  template <typename T>
195  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
196  {
197  }
198 
203  template <typename T>
205  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
206  {
207  }
208 
213  template <typename T>
215  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
216  {
217  }
218 
224  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
225  {
226  }
227 
228  protected:
229  void const* _data{};
230  size_type const _size{};
231 
240  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
241  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
242  {
243  }
244 
253  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
254  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
255  {
256  }
257 };
258 
262 class literal : public expression {
263  public:
270  template <typename T>
271  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
272  {
273  }
274 
281  template <typename T>
282  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
283  {
284  }
285 
292  template <typename T>
293  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
294  {
295  }
296 
302  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
303 
309  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
310 
316  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
317 
321  cudf::size_type accept(detail::expression_parser& visitor) const override;
322 
326  std::reference_wrapper<expression const> accept(
327  detail::expression_transformer& visitor) const override;
328 
329  [[nodiscard]] bool may_evaluate_null(table_view const& left,
330  table_view const& right,
331  rmm::cuda_stream_view stream) const override
332  {
333  return !is_valid(stream);
334  }
335 
342  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
343  {
344  return scalar.is_valid(stream);
345  }
346 
347  private:
348  cudf::scalar const& scalar;
349  generic_scalar_device_view const value;
350 };
351 
355 class column_reference : public expression {
356  public:
365  table_reference table_source = table_reference::LEFT)
366  : column_index(column_index), table_source(table_source)
367  {
368  }
369 
375  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
376 
382  [[nodiscard]] table_reference get_table_source() const { return table_source; }
383 
390  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
391  {
392  return table.column(get_column_index()).type();
393  }
394 
402  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
403  table_view const& right_table) const
404  {
405  auto const table = [&] {
406  if (get_table_source() == table_reference::LEFT) {
407  return left_table;
408  } else if (get_table_source() == table_reference::RIGHT) {
409  return right_table;
410  } else {
411  CUDF_FAIL("Column reference data type cannot be determined from unknown table.");
412  }
413  }();
414  return table.column(get_column_index()).type();
415  }
416 
420  cudf::size_type accept(detail::expression_parser& visitor) const override;
421 
425  std::reference_wrapper<expression const> accept(
426  detail::expression_transformer& visitor) const override;
427 
428  [[nodiscard]] bool may_evaluate_null(table_view const& left,
429  table_view const& right,
430  rmm::cuda_stream_view stream) const override
431  {
432  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
433  }
434 
435  private:
436  cudf::size_type column_index;
437  table_reference table_source;
438 };
439 
443 class operation : public expression {
444  public:
451  operation(ast_operator op, expression const& input);
452 
460  operation(ast_operator op, expression const& left, expression const& right);
461 
462  // operation only stores references to expressions, so it does not accept r-value
463  // references: the calling code must own the expressions.
464  operation(ast_operator op, expression&& input) = delete;
465  operation(ast_operator op, expression&& left, expression&& right) = delete;
466  operation(ast_operator op, expression&& left, expression const& right) = delete;
467  operation(ast_operator op, expression const& left, expression&& right) = delete;
468 
474  [[nodiscard]] ast_operator get_operator() const { return op; }
475 
481  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> get_operands() const
482  {
483  return operands;
484  }
485 
489  cudf::size_type accept(detail::expression_parser& visitor) const override;
490 
494  std::reference_wrapper<expression const> accept(
495  detail::expression_transformer& visitor) const override;
496 
497  [[nodiscard]] bool may_evaluate_null(table_view const& left,
498  table_view const& right,
499  rmm::cuda_stream_view stream) const override
500  {
501  return std::any_of(operands.cbegin(),
502  operands.cend(),
503  [&left, &right, &stream](std::reference_wrapper<expression const> subexpr) {
504  return subexpr.get().may_evaluate_null(left, right, stream);
505  });
506  };
507 
508  private:
509  ast_operator const op;
510  std::vector<std::reference_wrapper<expression const>> const operands;
511 };
512 
517  public:
524  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
525 
531  [[nodiscard]] std::string get_column_name() const { return column_name; }
532 
536  cudf::size_type accept(detail::expression_parser& visitor) const override;
537 
541  std::reference_wrapper<expression const> accept(
542  detail::expression_transformer& visitor) const override;
543 
544  [[nodiscard]] bool may_evaluate_null(table_view const& left,
545  table_view const& right,
546  rmm::cuda_stream_view stream) const override
547  {
548  return true;
549  }
550 
551  private:
552  std::string column_name;
553 };
554  // end of group
556 } // namespace ast
557 
558 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
std::vector< std::reference_wrapper< expression const > > get_operands() const
Get the operands.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:243
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:682
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:244
An owning class to represent a singular value.
Definition: scalar.hpp:49
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:431
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:41
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:626
table_reference
Enum of table references.
ast_operator
Enum of supported operators.
Definition: expressions.hpp:94
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
@ NOT
Logical Not (!)
@ TANH
Hyperbolic tangent.
@ DIV
operator / using common type of lhs and rhs
@ CBRT
Cube-root (x^(1.0/3))
@ ARCSINH
Hyperbolic sine inverse.
@ SQRT
Square-root (x^0.5)
@ PYMOD
operator % using Python's sign rules for negatives
@ LOG
Natural Logarithm (base e)
@ FLOOR
largest integer value not greater than arg
@ ARCTAN
Trigonometric tangent inverse.
@ SIN
Trigonometric sine.
@ CEIL
Smallest integer value not less than arg.
@ ARCSIN
Trigonometric sine inverse.
@ RINT
Rounds the floating-point argument arg to an integer value.
@ BIT_INVERT
Bitwise Not (~)
@ TAN
Trigonometric tangent.
@ ARCCOS
Trigonometric cosine inverse.
@ ABS
Absolute value.
@ ARCTANH
Hyperbolic tangent inverse.
@ GREATER_EQUAL
operator >=
@ EXP
Exponential (base e, Euler number)
@ CAST_TO_FLOAT64
Cast value to double.
@ TRUE_DIV
operator / after promoting type to floating point
@ LOGICAL_AND
operator &&
@ SINH
Hyperbolic sine.
@ CAST_TO_UINT64
Cast value to uint64_t.
@ COSH
Hyperbolic cosine.
@ ARCCOSH
Hyperbolic cosine inverse.
@ IDENTITY
Identity function.
@ IS_NULL
Check if operand is null.
@ CAST_TO_INT64
Cast value to int64_t.
@ COS
Trigonometric cosine.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=rmm::mr::get_current_device_resource())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:217
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: aggregation.hpp:35
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:46
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
Definition: expressions.hpp:71
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Class definitions for (mutable)_table_view
Type declarations for libcudf.