Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 190 additions & 0 deletions datafusion/expr-common/src/signature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,196 @@ pub enum Volatility {
Volatile,
}

/// Describes the volatility of an expression, considering both the expression
/// type and its children.
///
/// This is distinct from [`Volatility`] which describes function behavior.
/// `ExprVolatility` describes the overall expression behavior after considering
/// the expression structure and all its inputs.
///
/// The variants are ordered from least volatile to most volatile:
/// `Constant < Immutable < Stable < Volatile`
///
/// # Examples
///
/// ```rust
/// use datafusion_expr_common::signature::ExprVolatility;
///
/// // Ordering comparison
/// assert!(ExprVolatility::Constant < ExprVolatility::Immutable);
/// assert!(ExprVolatility::Immutable < ExprVolatility::Stable);
/// assert!(ExprVolatility::Stable < ExprVolatility::Volatile);
///
/// // Combining volatilities takes the maximum
/// let vol = ExprVolatility::Constant.max(ExprVolatility::Immutable);
/// assert_eq!(vol, ExprVolatility::Immutable);
/// ```
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum ExprVolatility {
/// The expression always evaluates to the same value regardless of input.
///
/// Examples:
/// - Literals: `1`, `'hello'`, `NULL`
/// - Constant expressions: `1 + 2`, `abs(-5)`
/// - Immutable functions with all constant arguments: `concat('a', 'b')`
///
/// Constant expressions can be evaluated at planning time.
Constant,

/// The expression returns the same output for the same input values.
///
/// Examples:
/// - Column references: `col("a")`
/// - Immutable functions with non-constant arguments: `abs(col("a"))`
/// - Binary operations on columns: `col("a") + col("b")`
///
/// Immutable expressions cannot be evaluated at planning time but are
/// deterministic given the input data.
Immutable,

/// The expression returns the same value within a single query execution
/// but may return different values across queries.
///
/// Examples:
/// - `now()`, `current_date()`, `current_timestamp()`
/// - Stable functions with any arguments
///
/// Stable expressions can be evaluated once per query execution.
Stable,

/// The expression may return different values on each evaluation.
///
/// Examples:
/// - `random()`, `uuid()`
/// - Any expression containing a volatile function
///
/// Volatile expressions must be evaluated for each row.
Volatile,
}

impl ExprVolatility {
/// Combines two volatilities, returning the "highest" (most volatile).
///
/// This is used when computing the volatility of expressions with
/// multiple children (e.g., binary expressions, function arguments).
///
/// # Examples
///
/// ```rust
/// use datafusion_expr_common::signature::ExprVolatility;
///
/// assert_eq!(
/// ExprVolatility::Constant.max(ExprVolatility::Immutable),
/// ExprVolatility::Immutable
/// );
/// assert_eq!(
/// ExprVolatility::Stable.max(ExprVolatility::Immutable),
/// ExprVolatility::Stable
/// );
/// assert_eq!(
/// ExprVolatility::Volatile.max(ExprVolatility::Constant),
/// ExprVolatility::Volatile
/// );
/// ```
#[inline]
pub fn max(self, other: Self) -> Self {
std::cmp::max(self, other)
}

/// Folds multiple volatilities together, returning the maximum (most volatile).
///
/// This is useful when computing the volatility of an expression with
/// multiple children.
///
/// # Examples
///
/// ```
/// use datafusion_expr_common::signature::ExprVolatility;
///
/// // Folding constants gives constant
/// let volatilities = [ExprVolatility::Constant, ExprVolatility::Constant];
/// assert_eq!(ExprVolatility::fold(volatilities.iter().copied()), ExprVolatility::Constant);
///
/// // Any immutable promotes the result to immutable
/// let volatilities = [ExprVolatility::Constant, ExprVolatility::Immutable];
/// assert_eq!(ExprVolatility::fold(volatilities.iter().copied()), ExprVolatility::Immutable);
///
/// // Any volatile makes the whole expression volatile
/// let volatilities = [ExprVolatility::Constant, ExprVolatility::Volatile];
/// assert_eq!(ExprVolatility::fold(volatilities.iter().copied()), ExprVolatility::Volatile);
///
/// // Empty iterator returns Constant (identity element)
/// assert_eq!(ExprVolatility::fold(std::iter::empty()), ExprVolatility::Constant);
/// ```
pub fn fold(volatilities: impl Iterator<Item = Self>) -> Self {
volatilities.fold(ExprVolatility::Constant, |acc, v| acc.max(v))
}

/// Computes the volatility of a function call given the function's volatility
/// and the volatilities of its arguments.
///
/// The result is the maximum of:
/// - The function's volatility (converted via [`From<Volatility>`])
/// - All argument volatilities
///
/// # Examples
///
/// ```
/// use datafusion_expr_common::signature::{ExprVolatility, Volatility};
///
/// // Immutable function with constant args = constant
/// let args = [ExprVolatility::Constant, ExprVolatility::Constant];
/// assert_eq!(
/// ExprVolatility::function_call_volatility(Volatility::Immutable, args.iter().copied()),
/// ExprVolatility::Constant
/// );
///
/// // Immutable function with column args = immutable
/// let args = [ExprVolatility::Immutable, ExprVolatility::Constant];
/// assert_eq!(
/// ExprVolatility::function_call_volatility(Volatility::Immutable, args.iter().copied()),
/// ExprVolatility::Immutable
/// );
///
/// // Volatile function = always volatile regardless of args
/// let args = [ExprVolatility::Constant];
/// assert_eq!(
/// ExprVolatility::function_call_volatility(Volatility::Volatile, args.iter().copied()),
/// ExprVolatility::Volatile
/// );
///
/// // Stable function with constant args = stable
/// let args = [ExprVolatility::Constant];
/// assert_eq!(
/// ExprVolatility::function_call_volatility(Volatility::Stable, args.iter().copied()),
/// ExprVolatility::Stable
/// );
/// ```
pub fn function_call_volatility(
func_volatility: Volatility,
arg_volatilities: impl Iterator<Item = Self>,
) -> Self {
let func_vol: Self = func_volatility.into();
Self::fold(std::iter::once(func_vol).chain(arg_volatilities))
}
}

impl From<Volatility> for ExprVolatility {
/// Converts a function's [`Volatility`] to an [`ExprVolatility`].
///
/// - [`Volatility::Immutable`] → [`ExprVolatility::Constant`] (immutable functions
/// with constant args produce constant results)
/// - [`Volatility::Stable`] → [`ExprVolatility::Stable`]
/// - [`Volatility::Volatile`] → [`ExprVolatility::Volatile`]
fn from(v: Volatility) -> Self {
match v {
Volatility::Immutable => ExprVolatility::Constant,
Volatility::Stable => ExprVolatility::Stable,
Volatility::Volatile => ExprVolatility::Volatile,
}
}
}

/// Represents the arity (number of arguments) of a function signature
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Arity {
Expand Down
138 changes: 137 additions & 1 deletion datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use std::sync::Arc;
use crate::expr_fn::binary_expr;
use crate::function::WindowFunctionSimplification;
use crate::logical_plan::Subquery;
use crate::{AggregateUDF, Volatility};
use crate::{AggregateUDF, ExprVolatility, Volatility};
use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF};

use arrow::datatypes::{DataType, Field, FieldRef};
Expand Down Expand Up @@ -1969,6 +1969,10 @@ impl Expr {
/// Note: unlike [`Self::is_volatile`], this function does not consider inputs:
/// - `rand()` returns `true`,
/// - `a + rand()` returns `false`
#[deprecated(
since = "47.0.0",
note = "Use `node_volatility() == ExprVolatility::Volatile` instead"
)]
pub fn is_volatile_node(&self) -> bool {
matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile)
}
Expand All @@ -1980,11 +1984,143 @@ impl Expr {
/// return a different value.
///
/// See [`Volatility`] for more information.
#[deprecated(
since = "47.0.0",
note = "Use `volatility() == ExprVolatility::Volatile` instead"
)]
#[allow(deprecated)]
pub fn is_volatile(&self) -> bool {
self.exists(|expr| Ok(expr.is_volatile_node()))
.expect("exists closure is infallible")
}

/// Returns the volatility of this expression node without considering children.
///
/// For most expression types, this returns the intrinsic volatility of the
/// node itself. For expressions with children, use [`Self::volatility`] to
/// get the combined volatility.
///
/// # Examples
///
/// ```
/// use datafusion_expr::{col, lit, ExprVolatility};
///
/// // Literal is constant
/// assert_eq!(lit(42).node_volatility(), ExprVolatility::Constant);
///
/// // Column reference is immutable
/// assert_eq!(col("a").node_volatility(), ExprVolatility::Immutable);
/// ```
pub fn node_volatility(&self) -> ExprVolatility {
match self {
// Literals are always constant
Expr::Literal(_, _) => ExprVolatility::Constant,

// Column references depend on input data
Expr::Column(_) => ExprVolatility::Immutable,

// Scalar variables (e.g., @@var) are typically stable within a query
Expr::ScalarVariable(_, _) => ExprVolatility::Stable,

// Outer reference columns depend on outer query data
Expr::OuterReferenceColumn(_, _) => ExprVolatility::Immutable,

// Placeholders are resolved at planning time, but their value
// comes from external input, so treat as immutable
Expr::Placeholder(_) => ExprVolatility::Immutable,

// Scalar functions have their own volatility
Expr::ScalarFunction(func) => func.func.signature().volatility.into(),

// Aggregate functions are typically immutable (deterministic for same input)
Expr::AggregateFunction(_) => ExprVolatility::Immutable,

// Window functions are typically immutable
Expr::WindowFunction(_) => ExprVolatility::Immutable,

// Subqueries: depends on the subquery, but generally immutable
Expr::ScalarSubquery(_) | Expr::Exists(_) | Expr::InSubquery(_) => {
ExprVolatility::Immutable
}

// All other expression types (BinaryExpr, Cast, Case, etc.)
// have volatility determined entirely by their children
Expr::Alias(_)
| Expr::BinaryExpr(_)
| Expr::Like(_)
| Expr::SimilarTo(_)
| Expr::Not(_)
| Expr::IsNotNull(_)
| Expr::IsNull(_)
| Expr::IsTrue(_)
| Expr::IsFalse(_)
| Expr::IsUnknown(_)
| Expr::IsNotTrue(_)
| Expr::IsNotFalse(_)
| Expr::IsNotUnknown(_)
| Expr::Negative(_)
| Expr::Between(_)
| Expr::Case(_)
| Expr::Cast(_)
| Expr::TryCast(_)
| Expr::InList(_)
| Expr::GroupingSet(_)
| Expr::Unnest(_) => ExprVolatility::Constant,

// Wildcard should be resolved before this point
#[expect(deprecated)]
Expr::Wildcard { .. } => ExprVolatility::Constant,
}
}

/// Returns the overall volatility of this expression tree.
///
/// This recursively computes the volatility by combining the node's
/// volatility with all of its children's volatilities, returning
/// the maximum (most volatile) level found.
///
/// # Examples
///
/// ```
/// use datafusion_expr::{col, lit, ExprVolatility};
///
/// // Literal is constant
/// assert_eq!(lit(42).volatility(), ExprVolatility::Constant);
///
/// // Column reference is immutable
/// assert_eq!(col("a").volatility(), ExprVolatility::Immutable);
///
/// // Constant + Constant = Constant
/// assert_eq!((lit(1) + lit(2)).volatility(), ExprVolatility::Constant);
///
/// // Constant + Immutable = Immutable
/// assert_eq!((col("a") + lit(1)).volatility(), ExprVolatility::Immutable);
/// ```
pub fn volatility(&self) -> ExprVolatility {
let mut max_volatility = self.node_volatility();

// Early return for volatile (can't get higher)
if max_volatility == ExprVolatility::Volatile {
return max_volatility;
}

// Traverse children and find maximum volatility
self.apply(|expr| {
let child_volatility = expr.node_volatility();
max_volatility = max_volatility.max(child_volatility);

// Short-circuit if we've found volatile (highest level)
if max_volatility == ExprVolatility::Volatile {
Ok(TreeNodeRecursion::Stop)
} else {
Ok(TreeNodeRecursion::Continue)
}
})
.expect("volatility traversal is infallible");

max_volatility
}

/// Recursively find all [`Expr::Placeholder`] expressions, and
/// to infer their [`DataType`] from the context of their use.
///
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ pub use datafusion_expr_common::columnar_value::ColumnarValue;
pub use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
pub use datafusion_expr_common::operator::Operator;
pub use datafusion_expr_common::signature::{
ArrayFunctionArgument, ArrayFunctionSignature, Coercion, Signature,
ArrayFunctionArgument, ArrayFunctionSignature, Coercion, ExprVolatility, Signature,
TIMEZONE_WILDCARD, TypeSignature, TypeSignatureClass, Volatility,
};
pub use datafusion_expr_common::type_coercion::binary;
Expand Down
Loading
Loading