expressions.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
19 #include <cudf/scalar/scalar.hpp>
22 #include <cudf/types.hpp>
23 #include <cudf/utilities/error.hpp>
24 
25 #include <cstdint>
26 #include <memory>
27 #include <vector>
28 
29 namespace CUDF_EXPORT cudf {
30 namespace ast {
37 // Forward declaration.
38 namespace detail {
39 class expression_parser;
40 class expression_transformer;
41 } // namespace detail
42 
49 struct expression {
56  virtual cudf::size_type accept(detail::expression_parser& visitor) const = 0;
57 
64  virtual std::reference_wrapper<expression const> accept(
65  detail::expression_transformer& visitor) const = 0;
66 
74  [[nodiscard]] bool may_evaluate_null(table_view const& left, rmm::cuda_stream_view stream) const
75  {
76  return may_evaluate_null(left, left, stream);
77  }
78 
87  [[nodiscard]] virtual bool may_evaluate_null(table_view const& left,
88  table_view const& right,
89  rmm::cuda_stream_view stream) const = 0;
90 
91  virtual ~expression() {}
92 };
93 
99 enum class table_reference {
100  LEFT,
101  RIGHT,
102  OUTPUT
103 };
104 
109  public:
116  template <typename T>
117  __device__ T const value() const noexcept
118  {
119  if constexpr (std::is_same_v<T, cudf::string_view>) {
120  return string_view(static_cast<char const*>(_data), _size);
121  }
122  return *static_cast<T const*>(_data);
123  }
124 
129  template <typename T>
131  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
132  {
133  }
134 
139  template <typename T>
141  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
142  {
143  }
144 
149  template <typename T>
151  : generic_scalar_device_view(s.type(), s.data(), s.validity_data())
152  {
153  }
154 
160  : generic_scalar_device_view(s.type(), s.data(), s.validity_data(), s.size())
161  {
162  }
163 
164  protected:
165  void const* _data{};
166  size_type const _size{};
167 
176  generic_scalar_device_view(data_type type, void const* data, bool* is_valid)
177  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data)
178  {
179  }
180 
189  generic_scalar_device_view(data_type type, void const* data, bool* is_valid, size_type size)
190  : cudf::detail::scalar_device_view_base(type, is_valid), _data(data), _size(size)
191  {
192  }
193 };
194 
198 class literal : public expression {
199  public:
206  template <typename T>
207  literal(cudf::numeric_scalar<T>& value) : scalar(value), value(value)
208  {
209  }
210 
217  template <typename T>
218  literal(cudf::timestamp_scalar<T>& value) : scalar(value), value(value)
219  {
220  }
221 
228  template <typename T>
229  literal(cudf::duration_scalar<T>& value) : scalar(value), value(value)
230  {
231  }
232 
238  literal(cudf::string_scalar& value) : scalar(value), value(value) {}
239 
245  [[nodiscard]] cudf::data_type get_data_type() const { return get_value().type(); }
246 
252  [[nodiscard]] generic_scalar_device_view get_value() const { return value; }
253 
259  [[nodiscard]] cudf::scalar const& get_scalar() const { return scalar; }
260 
264  cudf::size_type accept(detail::expression_parser& visitor) const override;
265 
269  std::reference_wrapper<expression const> accept(
270  detail::expression_transformer& visitor) const override;
271 
272  [[nodiscard]] bool may_evaluate_null(table_view const& left,
273  table_view const& right,
274  rmm::cuda_stream_view stream) const override
275  {
276  return !is_valid(stream);
277  }
278 
285  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream) const
286  {
287  return scalar.is_valid(stream);
288  }
289 
290  private:
291  cudf::scalar const& scalar;
292  generic_scalar_device_view const value;
293 };
294 
298 class column_reference : public expression {
299  public:
308  table_reference table_source = table_reference::LEFT)
309  : column_index(column_index), table_source(table_source)
310  {
311  }
312 
318  [[nodiscard]] cudf::size_type get_column_index() const { return column_index; }
319 
325  [[nodiscard]] table_reference get_table_source() const { return table_source; }
326 
333  [[nodiscard]] cudf::data_type get_data_type(table_view const& table) const
334  {
335  return table.column(get_column_index()).type();
336  }
337 
345  [[nodiscard]] cudf::data_type get_data_type(table_view const& left_table,
346  table_view const& right_table) const
347  {
348  auto const table = [&] {
349  if (get_table_source() == table_reference::LEFT) {
350  return left_table;
351  } else if (get_table_source() == table_reference::RIGHT) {
352  return right_table;
353  } else {
354  CUDF_FAIL("Column reference data type cannot be determined from unknown table.");
355  }
356  }();
357  return table.column(get_column_index()).type();
358  }
359 
363  cudf::size_type accept(detail::expression_parser& visitor) const override;
364 
368  std::reference_wrapper<expression const> accept(
369  detail::expression_transformer& visitor) const override;
370 
371  [[nodiscard]] bool may_evaluate_null(table_view const& left,
372  table_view const& right,
373  rmm::cuda_stream_view stream) const override
374  {
375  return (table_source == table_reference::LEFT ? left : right).column(column_index).has_nulls();
376  }
377 
378  private:
379  cudf::size_type column_index;
380  table_reference table_source;
381 };
382 
386 class operation : public expression {
387  public:
394  operation(ast_operator op, expression const& input);
395 
403  operation(ast_operator op, expression const& left, expression const& right);
404 
405  // operation only stores references to expressions, so it does not accept r-value
406  // references: the calling code must own the expressions.
407  operation(ast_operator op, expression&& input) = delete;
408  operation(ast_operator op, expression&& left, expression&& right) = delete;
409  operation(ast_operator op, expression&& left, expression const& right) = delete;
410  operation(ast_operator op, expression const& left, expression&& right) = delete;
411 
417  [[nodiscard]] ast_operator get_operator() const { return op; }
418 
424  [[nodiscard]] std::vector<std::reference_wrapper<expression const>> const& get_operands() const
425  {
426  return operands;
427  }
428 
432  cudf::size_type accept(detail::expression_parser& visitor) const override;
433 
437  std::reference_wrapper<expression const> accept(
438  detail::expression_transformer& visitor) const override;
439 
440  [[nodiscard]] bool may_evaluate_null(table_view const& left,
441  table_view const& right,
442  rmm::cuda_stream_view stream) const override;
443 
444  private:
445  ast_operator op;
446  std::vector<std::reference_wrapper<expression const>> operands;
447 };
448 
453  public:
460  column_name_reference(std::string column_name) : column_name(std::move(column_name)) {}
461 
467  [[nodiscard]] std::string get_column_name() const { return column_name; }
468 
472  cudf::size_type accept(detail::expression_parser& visitor) const override;
473 
477  std::reference_wrapper<expression const> accept(
478  detail::expression_transformer& visitor) const override;
479 
480  [[nodiscard]] bool may_evaluate_null(table_view const& left,
481  table_view const& right,
482  rmm::cuda_stream_view stream) const override
483  {
484  return true;
485  }
486 
487  private:
488  std::string column_name;
489 };
490 
495 class tree {
496  public:
500  tree() = default;
501 
505  tree(tree&&) = default;
506 
511  tree& operator=(tree&&) = default;
512 
513  ~tree() = default;
514 
515  // the tree is not copyable
516  tree(tree const&) = delete;
517  tree& operator=(tree const&) = delete;
518 
524  template <typename Expr, typename... Args>
525  std::enable_if_t<std::is_base_of_v<expression, Expr>, Expr const&> emplace(Args&&... args)
526  {
527  auto expr = std::make_unique<Expr>(std::forward<Args>(args)...);
528  Expr const& expr_ref = *expr;
529  expressions.emplace_back(std::move(expr));
530  return expr_ref;
531  }
532 
538  template <typename Expr>
539  decltype(auto) push(Expr expr)
540  {
541  return emplace<Expr>(std::move(expr));
542  }
543 
548  [[nodiscard]] expression const& front() const { return *expressions.front(); }
549 
554  [[nodiscard]] expression const& back() const { return *expressions.back(); }
555 
560  [[nodiscard]] size_t size() const { return expressions.size(); }
561 
567  expression const& at(size_t index) { return *expressions.at(index); }
568 
574  expression const& operator[](size_t index) const { return *expressions[index]; }
575 
576  private:
577  // TODO: use better ownership semantics, the unique_ptr here is redundant. Consider using a bump
578  // allocator with type-erased deleters.
579  std::vector<std::unique_ptr<expression>> expressions;
580 };
581  // end of group
583 } // namespace ast
584 
585 } // namespace CUDF_EXPORT cudf
A expression referring to data from a column in a table.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::string get_column_name() const
Get the column name.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
column_name_reference(std::string column_name)
Construct a new column name reference object.
A expression referring to data from a column in a table.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
cudf::data_type get_data_type(table_view const &left_table, table_view const &right_table) const
Get the data type.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
cudf::data_type get_data_type(table_view const &table) const
Get the data type.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
table_reference get_table_source() const
Get the table source.
column_reference(cudf::size_type column_index, table_reference table_source=table_reference::LEFT)
Construct a new column reference object.
cudf::size_type get_column_index() const
Get the column index.
A type-erased scalar_device_view where the value is a fixed width type or a string.
generic_scalar_device_view(duration_scalar< T > &s)
Construct a new generic scalar device view object from a duration scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid)
Construct a new fixed width scalar device view object.
generic_scalar_device_view(string_scalar &s)
Construct a new generic scalar device view object from a string scalar.
generic_scalar_device_view(timestamp_scalar< T > &s)
Construct a new generic scalar device view object from a timestamp scalar.
generic_scalar_device_view(data_type type, void const *data, bool *is_valid, size_type size)
Construct a new string scalar device view object.
T const value() const noexcept
Returns the stored value.
generic_scalar_device_view(numeric_scalar< T > &s)
Construct a new generic scalar device view object from a numeric scalar.
A literal value used in an abstract syntax tree.
literal(cudf::numeric_scalar< T > &value)
Construct a new literal object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
generic_scalar_device_view get_value() const
Get the value object.
cudf::data_type get_data_type() const
Get the data type.
literal(cudf::string_scalar &value)
Construct a new literal object.
cudf::scalar const & get_scalar() const
Get the scalar.
literal(cudf::duration_scalar< T > &value)
Construct a new literal object.
bool is_valid(rmm::cuda_stream_view stream) const
Check if the underlying scalar is valid.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
literal(cudf::timestamp_scalar< T > &value)
Construct a new literal object.
An operation expression holds an operator and zero or more operands.
bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const override
Returns true if the expression may evaluate to null.
std::vector< std::reference_wrapper< expression const > > const & get_operands() const
Get the operands.
ast_operator get_operator() const
Get the operator.
operation(ast_operator op, expression const &left, expression const &right)
Construct a new binary operation object.
std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const override
Accepts a visitor class.
operation(ast_operator op, expression const &input)
Construct a new unary operation object.
cudf::size_type accept(detail::expression_parser &visitor) const override
Accepts a visitor class.
An AST expression tree. It owns and contains multiple dependent expressions. All the expressions are ...
tree(tree &&)=default
Moves the ast tree.
tree & operator=(tree &&)=default
move-assigns the AST tree
size_t size() const
get the number of expressions added to the tree
std::enable_if_t< std::is_base_of_v< expression, Expr >, Expr const & > emplace(Args &&... args)
Add an expression to the AST tree.
tree()=default
construct an empty ast tree
expression const & front() const
get the first expression in the tree
expression const & at(size_t index)
get the expression at an index in the tree. Index is checked.
expression const & back() const
get the last expression in the tree
expression const & operator[](size_t index) const
get the expression at an index in the tree. Index is unchecked.
Indicator for the logical data type of an element in a column.
Definition: types.hpp:249
A non-owning view of scalar from device that is trivially copyable and usable in CUDA device code.
An owning class to represent a duration value in device memory.
Definition: scalar.hpp:667
An owning class to represent a numerical value in device memory.
Definition: scalar.hpp:239
An owning class to represent a singular value.
Definition: scalar.hpp:51
bool is_valid(rmm::cuda_stream_view stream=cudf::get_default_stream()) const
Indicates whether the scalar contains a valid value.
An owning class to represent a string in device memory.
Definition: scalar.hpp:421
A non-owning, immutable view of device data that is a variable length char array representing a UTF-8...
Definition: string_view.hpp:44
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
An owning class to represent a timestamp value in device memory.
Definition: scalar.hpp:611
table_reference
Enum of table references.
Definition: expressions.hpp:99
ast_operator
Enum of supported operators.
@ RIGHT
Column index in the right table.
@ OUTPUT
Column index in the output table.
@ LEFT
Column index in the left table.
std::unique_ptr< cudf::column > is_valid(cudf::column_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Creates a column of type_id::BOOL8 elements where for every element in input true indicates the value...
#define CUDF_FAIL(...)
Indicates that an erroneous code path has been taken.
Definition: error.hpp:193
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:95
cuDF interfaces
Definition: host_udf.hpp:37
Class definitions for cudf::scalar.
Scalar device view class definitions.
A generic expression that can be evaluated to return a value.
Definition: expressions.hpp:49
bool may_evaluate_null(table_view const &left, rmm::cuda_stream_view stream) const
Returns true if the expression may evaluate to null.
Definition: expressions.hpp:74
virtual cudf::size_type accept(detail::expression_parser &visitor) const =0
Accepts a visitor class.
virtual std::reference_wrapper< expression const > accept(detail::expression_transformer &visitor) const =0
Accepts a visitor class.
virtual bool may_evaluate_null(table_view const &left, table_view const &right, rmm::cuda_stream_view stream) const =0
Returns true if the expression may evaluate to null.
Class definitions for (mutable)_table_view
Type declarations for libcudf.