expressions.hpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #pragma once
6 
8 #include <cudf/scalar/scalar.hpp>
11 #include <cudf/types.hpp>
12 #include <cudf/utilities/error.hpp>
13 
14 #include <cstdint>
15 #include <memory>
16 #include <vector>
17 
18 namespace CUDF_EXPORT cudf {
19 
20 namespace detail {
21 namespace row_ir {
22 
32 struct node;
33 
37 struct ast_converter;
38 
39 } // namespace row_ir
40 } // namespace detail
41 
42 namespace ast {
49 // Forward declaration.
50 namespace detail {
51 class expression_parser;
52 class expression_transformer;
53 } // namespace detail
54 
61 struct expression {
68  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
69 
76  virtual std::reference_wrapper<expression const> accept(
77  detail::expression_transformer& visitor) const = 0;
78 
85  [[nodiscard]] virtual std::unique_ptr<cudf::detail::row_ir::node> accept(
86  cudf::detail::row_ir::ast_converter& visitor) const = 0;
87 
95  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
96  {
97  return may_evaluate_null(left, left, stream);
98  }
99 
108  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
109  table_view const& right,
110  rmm::cuda_stream_view stream) const = 0;
111 
112  virtual ~expression() {}
113 };
114 
120 enum class table_reference {
121  LEFT,
122  RIGHT,
123  OUTPUT
124 };
125 
130  public:
137  template <typename T>
138  __device__ T const value() const noexcept
139  {
140  if constexpr (std::is_same_v<T, cudf::string_view>) {
141  return string_view(static_cast<char const*>(_data), _size);
142  }
143  return *static_cast<T const*>(_data);
144  }
145 
150  template <typename T>
152  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
153  {
154  }
155 
160  template <typename T>
162  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
163  {
164  }
165 
170  template <typename T>
172  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
173  {
174  }
175 
181  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
182  {
183  }
184 
189  template <typename T>
191  : generic_scalar_device_view{s.type(), s.data(), s.validity_data()}
192  {
193  }
194 
195  protected:
196  void const* _data{};
197  size_type const _size{};
198 
207  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
208  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
209  {
210  }
211 
220  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
221  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
222  {
223  }
224 };
225 
229 class literal : public expression {
230  public:
237  template <typename T>
238  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
239  {
240  }
241 
248  template <typename T>
249  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
250  {
251  }
252 
259  template <typename T>
260  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
261  {
262  }
263 
269  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
270 
276  template <typename T>
277  literal(cudf::fixed_point_scalar<T>& value) : scalar(value), value(value)
278  {
279  }
280 
286  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
287 
293  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
294 
300  [[nodiscard]] cudf::scalar const& get_scalar() const { return scalar; }
301 
305  cudf::size_type accept(detail::expression_parser& visitor) const override;
306 
310  std::reference_wrapper<expression const> accept(
311  detail::expression_transformer& visitor) const override;
312 
316  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
317  cudf::detail::row_ir::ast_converter& visitor) const override;
318 
319  [[nodiscard]] bool may_evaluate_null(table_view const& left,
320  table_view const& right,
321  rmm::cuda_stream_view stream) const override
322  {
323  return !is_valid(stream);
324  }
325 
332  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
333  {
334  return scalar.is_valid(stream);
335  }
336 
337  private:
338  cudf::scalar const& scalar;
339  generic_scalar_device_view const value;
340 };
341 
345 class column_reference : public expression {
346  public:
355  table_reference table_source = table_reference::LEFT)
356  : column_index(column_index), table_source(table_source)
357  {
358  }
359 
365  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
366 
372  [[nodiscard]] table_reference get_table_source() const { return table_source; }
373 
380  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
381  {
382  return table.column(get_column_index()).type();
383  }
384 
392  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
393  table_view const& right_table) const
394  {
395  auto const table = [&] {
396  if (get_table_source() == table_reference::LEFT) {
397  return left_table;
398  } else if (get_table_source() == table_reference::RIGHT) {
399  return right_table;
400  } else {
401  CUDF_FAIL("Column reference data type cannot be determined from unknown table.");
402  }
403  }();
404  return table.column(get_column_index()).type();
405  }
406 
410  cudf::size_type accept(detail::expression_parser& visitor) const override;
411 
415  std::reference_wrapper<expression const> accept(
416  detail::expression_transformer& visitor) const override;
417 
418  [[nodiscard]] bool may_evaluate_null(table_view const& left,
419  table_view const& right,
420  rmm::cuda_stream_view stream) const override
421  {
422  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
423  }
424 
428  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
429  cudf::detail::row_ir::ast_converter& visitor) const override;
430 
431  private:
432  cudf::size_type column_index;
433  table_reference table_source;
434 };
435 
439 class operation : public expression {
440  public:
447  operation(ast_operator op, expression const& input);
448 
456  operation(ast_operator op, expression const& left, expression const& right);
457 
458  // operation only stores references to expressions, so it does not accept r-value
459  // references: the calling code must own the expressions.
460  operation(ast_operator op, expression&& input) = delete;
461  operation(ast_operator op, expression&& left, expression&& right) = delete;
462  operation(ast_operator op, expression&& left, expression const& right) = delete;
463  operation(ast_operator op, expression const& left, expression&& right) = delete;
464 
470  [[nodiscard]] ast_operator get_operator() const { return op; }
471 
477  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
478  {
479  return operands;
480  }
481 
485  cudf::size_type accept(detail::expression_parser& visitor) const override;
486 
490  std::reference_wrapper<expression const> accept(
491  detail::expression_transformer& visitor) const override;
492 
493  [[nodiscard]] bool may_evaluate_null(table_view const& left,
494  table_view const& right,
495  rmm::cuda_stream_view stream) const override;
496 
500  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
501  cudf::detail::row_ir::ast_converter& visitor) const override;
502 
503  private:
504  ast_operator op;
505  std::vector<std::reference_wrapper<expression const>> operands;
506 };
507 
512  public:
519  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
520 
526  [[nodiscard]] std::string get_column_name() const { return column_name; }
527 
531  cudf::size_type accept(detail::expression_parser& visitor) const override;
532 
536  std::reference_wrapper<expression const> accept(
537  detail::expression_transformer& visitor) const override;
538 
539  [[nodiscard]] bool may_evaluate_null(table_view const& left,
540  table_view const& right,
541  rmm::cuda_stream_view stream) const override
542  {
543  return true;
544  }
545 
549  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
550  cudf::detail::row_ir::ast_converter& visitor) const override;
551 
552  private:
553  std::string column_name;
554 };
555 
560 class tree {
561  public:
565  tree() = default;
566 
570  tree(tree&&) = default;
571 
576  tree& operator=(tree&&) = default;
577 
578  ~tree() = default;
579 
580  // the tree is not copyable
581  tree(tree const&) = delete;
582  tree& operator=(tree const&) = delete;
583 
589  template <typename Expr, typename... Args>
590  std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
591  {
592  auto expr = std::make_unique<Expr>(std::forward<Args>(args)...);
593  Expr const& expr_ref = *expr;
594  expressions.emplace_back(std::move(expr));
595  return expr_ref;
596  }
597 
603  template <typename Expr>
604  decltype(auto) push(Expr expr)
605  {
606  return emplace<Expr>(std::move(expr));
607  }
608 
613  [[nodiscard]] expression const& front() const { return *expressions.front(); }
614 
619  [[nodiscard]] expression const& back() const { return *expressions.back(); }
620 
625  [[nodiscard]] size_t size() const { return expressions.size(); }
626 
632  expression const& at(size_t index) { return *expressions.at(index); }
633 
639  expression const& operator[](size_t index) const { return *expressions[index]; }
640 
641  private:
642  // TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
643  // allocator with type-erased deleters.
644  std::vector<std::unique_ptr<expression>> expressions;
645 };
646  // end of group
648 } // namespace ast
649 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(cudf::fixed_point_scalar< T > &s)
Construct a new generic scalar device view object from a fixed-point scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
cudf::scalar const & get_scalar() const
Get the scalar.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::fixed_point_scalar< T > &value)
Construct a new literal object.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::vector< std::reference_wrapper< expression const > > const & get_operands() const
Get the operands.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
An AST expression tree. It owns and contains multiple dependent expressions. All the expressions are ...
tree(tree &&)=default
Moves the ast tree.
tree & operator=(tree &&)=default
move-assigns the AST tree
size_t size() const
get the number of expressions added to the tree
std::enable_if_t< std::is_base_of_v< expression, Expr >, Expr const & > emplace(Args &&... args)
Add an expression to the AST tree.
tree()=default
construct an empty ast tree
expression const & front() const
get the first expression in the tree
expression const & at(size_t index)
get the expression at an index in the tree. Index is checked.
expression const & back() const
get the last expression in the tree
expression const & operator[](size_t index) const
get the expression at an index in the tree. Index is unchecked.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:238
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:656
An owning class to represent a fixed_point number in device memory.
Definition: scalar.hpp:288
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:228
An owning class to represent a singular value.
Definition: scalar.hpp:40
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:410
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:33
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:189
A set of cudf::column's of the same size.
Definition: table.hpp:29
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:600
table_reference
Enum of table references.
ast_operator
Enum of supported operators.
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:182
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:84
cuDF interfaces
Definition: host_udf.hpp:26
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:61
virtual std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const =0
Accepts an row_ir::ast_converter class.
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
Definition: expressions.hpp:95
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Class definitions for (mutable)_table_view
Type declarations for libcudf.