expressions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
19 #include <cudf/scalar/scalar.hpp>
22 #include <cudf/types.hpp>
23 #include <cudf/utilities/error.hpp>
24 
25 #include <cstdint>
26 #include <memory>
27 #include <vector>
28 
29 namespace CUDF_EXPORT cudf {
30 
31 namespace detail {
32 namespace row_ir {
33 
43 struct node;
44 
48 struct ast_converter;
49 
50 } // namespace row_ir
51 } // namespace detail
52 
53 namespace ast {
60 // Forward declaration.
61 namespace detail {
62 class expression_parser;
63 class expression_transformer;
64 } // namespace detail
65 
72 struct expression {
79  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
80 
87  virtual std::reference_wrapper<expression const> accept(
88  detail::expression_transformer& visitor) const = 0;
89 
96  [[nodiscard]] virtual std::unique_ptr<cudf::detail::row_ir::node> accept(
97  cudf::detail::row_ir::ast_converter& visitor) const = 0;
98 
106  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
107  {
108  return may_evaluate_null(left, left, stream);
109  }
110 
119  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
120  table_view const& right,
121  rmm::cuda_stream_view stream) const = 0;
122 
123  virtual ~expression() {}
124 };
125 
131 enum class table_reference {
132  LEFT,
133  RIGHT,
134  OUTPUT
135 };
136 
141  public:
148  template <typename T>
149  __device__ T const value() const noexcept
150  {
151  if constexpr (std::is_same_v<T, cudf::string_view>) {
152  return string_view(static_cast<char const*>(_data), _size);
153  }
154  return *static_cast<T const*>(_data);
155  }
156 
161  template <typename T>
163  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
164  {
165  }
166 
171  template <typename T>
173  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
174  {
175  }
176 
181  template <typename T>
183  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
184  {
185  }
186 
192  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
193  {
194  }
195 
196  protected:
197  void const* _data{};
198  size_type const _size{};
199 
208  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
209  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
210  {
211  }
212 
221  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
222  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
223  {
224  }
225 };
226 
230 class literal : public expression {
231  public:
238  template <typename T>
239  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
240  {
241  }
242 
249  template <typename T>
250  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
251  {
252  }
253 
260  template <typename T>
261  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
262  {
263  }
264 
270  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
271 
277  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
278 
284  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
285 
291  [[nodiscard]] cudf::scalar const& get_scalar() const { return scalar; }
292 
296  cudf::size_type accept(detail::expression_parser& visitor) const override;
297 
301  std::reference_wrapper<expression const> accept(
302  detail::expression_transformer& visitor) const override;
303 
307  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
308  cudf::detail::row_ir::ast_converter& visitor) const override;
309 
310  [[nodiscard]] bool may_evaluate_null(table_view const& left,
311  table_view const& right,
312  rmm::cuda_stream_view stream) const override
313  {
314  return !is_valid(stream);
315  }
316 
323  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
324  {
325  return scalar.is_valid(stream);
326  }
327 
328  private:
329  cudf::scalar const& scalar;
330  generic_scalar_device_view const value;
331 };
332 
336 class column_reference : public expression {
337  public:
346  table_reference table_source = table_reference::LEFT)
347  : column_index(column_index), table_source(table_source)
348  {
349  }
350 
356  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
357 
363  [[nodiscard]] table_reference get_table_source() const { return table_source; }
364 
371  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
372  {
373  return table.column(get_column_index()).type();
374  }
375 
383  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
384  table_view const& right_table) const
385  {
386  auto const table = [&] {
387  if (get_table_source() == table_reference::LEFT) {
388  return left_table;
389  } else if (get_table_source() == table_reference::RIGHT) {
390  return right_table;
391  } else {
392  CUDF_FAIL("Column reference data type cannot be determined from unknown table.");
393  }
394  }();
395  return table.column(get_column_index()).type();
396  }
397 
401  cudf::size_type accept(detail::expression_parser& visitor) const override;
402 
406  std::reference_wrapper<expression const> accept(
407  detail::expression_transformer& visitor) const override;
408 
409  [[nodiscard]] bool may_evaluate_null(table_view const& left,
410  table_view const& right,
411  rmm::cuda_stream_view stream) const override
412  {
413  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
414  }
415 
419  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
420  cudf::detail::row_ir::ast_converter& visitor) const override;
421 
422  private:
423  cudf::size_type column_index;
424  table_reference table_source;
425 };
426 
430 class operation : public expression {
431  public:
438  operation(ast_operator op, expression const& input);
439 
447  operation(ast_operator op, expression const& left, expression const& right);
448 
449  // operation only stores references to expressions, so it does not accept r-value
450  // references: the calling code must own the expressions.
451  operation(ast_operator op, expression&& input) = delete;
452  operation(ast_operator op, expression&& left, expression&& right) = delete;
453  operation(ast_operator op, expression&& left, expression const& right) = delete;
454  operation(ast_operator op, expression const& left, expression&& right) = delete;
455 
461  [[nodiscard]] ast_operator get_operator() const { return op; }
462 
468  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
469  {
470  return operands;
471  }
472 
476  cudf::size_type accept(detail::expression_parser& visitor) const override;
477 
481  std::reference_wrapper<expression const> accept(
482  detail::expression_transformer& visitor) const override;
483 
484  [[nodiscard]] bool may_evaluate_null(table_view const& left,
485  table_view const& right,
486  rmm::cuda_stream_view stream) const override;
487 
491  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
492  cudf::detail::row_ir::ast_converter& visitor) const override;
493 
494  private:
495  ast_operator op;
496  std::vector<std::reference_wrapper<expression const>> operands;
497 };
498 
503  public:
510  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
511 
517  [[nodiscard]] std::string get_column_name() const { return column_name; }
518 
522  cudf::size_type accept(detail::expression_parser& visitor) const override;
523 
527  std::reference_wrapper<expression const> accept(
528  detail::expression_transformer& visitor) const override;
529 
530  [[nodiscard]] bool may_evaluate_null(table_view const& left,
531  table_view const& right,
532  rmm::cuda_stream_view stream) const override
533  {
534  return true;
535  }
536 
540  [[nodiscard]] std::unique_ptr<cudf::detail::row_ir::node> accept(
541  cudf::detail::row_ir::ast_converter& visitor) const override;
542 
543  private:
544  std::string column_name;
545 };
546 
551 class tree {
552  public:
556  tree() = default;
557 
561  tree(tree&&) = default;
562 
567  tree& operator=(tree&&) = default;
568 
569  ~tree() = default;
570 
571  // the tree is not copyable
572  tree(tree const&) = delete;
573  tree& operator=(tree const&) = delete;
574 
580  template <typename Expr, typename... Args>
581  std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
582  {
583  auto expr = std::make_unique<Expr>(std::forward<Args>(args)...);
584  Expr const& expr_ref = *expr;
585  expressions.emplace_back(std::move(expr));
586  return expr_ref;
587  }
588 
594  template <typename Expr>
595  decltype(auto) push(Expr expr)
596  {
597  return emplace<Expr>(std::move(expr));
598  }
599 
604  [[nodiscard]] expression const& front() const { return *expressions.front(); }
605 
610  [[nodiscard]] expression const& back() const { return *expressions.back(); }
611 
616  [[nodiscard]] size_t size() const { return expressions.size(); }
617 
623  expression const& at(size_t index) { return *expressions.at(index); }
624 
630  expression const& operator[](size_t index) const { return *expressions[index]; }
631 
632  private:
633  // TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
634  // allocator with type-erased deleters.
635  std::vector<std::unique_ptr<expression>> expressions;
636 };
637  // end of group
639 } // namespace ast
640 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
cudf::scalar const & get_scalar() const
Get the scalar.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::vector< std::reference_wrapper< expression const > > const & get_operands() const
Get the operands.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
An AST expression tree. It owns and contains multiple dependent expressions. All the expressions are ...
tree(tree &&)=default
Moves the ast tree.
tree & operator=(tree &&)=default
move-assigns the AST tree
size_t size() const
get the number of expressions added to the tree
std::enable_if_t< std::is_base_of_v< expression, Expr >, Expr const & > emplace(Args &&... args)
Add an expression to the AST tree.
tree()=default
construct an empty ast tree
expression const & front() const
get the first expression in the tree
expression const & at(size_t index)
get the expression at an index in the tree. Index is checked.
expression const & back() const
get the last expression in the tree
expression const & operator[](size_t index) const
get the expression at an index in the tree. Index is unchecked.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:249
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:667
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:239
An owning class to represent a singular value.
Definition: scalar.hpp:51
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:421
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:611
table_reference
Enum of table references.
ast_operator
Enum of supported operators.
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:193
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: host_udf.hpp:37
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:72
virtual std::unique_ptr< cudf::detail::row_ir::node > accept(cudf::detail::row_ir::ast_converter &visitor) const =0
Accepts an row_ir::ast_converter class.
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Class definitions for (mutable)_table_view
Type declarations for libcudf.