diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 846b627b2242..e7fdc7f4db41 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -1198,32 +1198,53 @@ impl Expr { } } - /// Recursively potentially multiple aliases from an expression. + /// Recursively removed potentially multiple aliases from an expression. /// - /// If the expression is not an alias, the expression is returned unchanged. - /// This method removes directly nested aliases, but not other nested - /// aliases. + /// This method removes nested aliases and returns [`Transformed`] + /// to signal if the expression was changed. /// /// # Example /// ``` /// # use datafusion_expr::col; /// // `foo as "bar"` is unaliased to `foo` /// let expr = col("foo").alias("bar"); - /// assert_eq!(expr.unalias_nested(), col("foo")); + /// assert_eq!(expr.unalias_nested().data, col("foo")); /// - /// // `foo as "bar" + baz` is not unaliased + /// // `foo as "bar" + baz` is unaliased /// let expr = col("foo").alias("bar") + col("baz"); - /// assert_eq!(expr.clone().unalias_nested(), expr); + /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz")); /// /// // `foo as "bar" as "baz" is unalaised to foo /// let expr = col("foo").alias("bar").alias("baz"); - /// assert_eq!(expr.unalias_nested(), col("foo")); + /// assert_eq!(expr.unalias_nested().data, col("foo")); /// ``` - pub fn unalias_nested(self) -> Expr { - match self { - Expr::Alias(alias) => alias.expr.unalias_nested(), - _ => self, - } + pub fn unalias_nested(self) -> Transformed { + self.transform_down_up( + |expr| { + // f_down: skip subqueries. Check in f_down to avoid recursing into them + let recursion = if matches!( + expr, + Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) + ) { + // subqueries could contain aliases so don't recurse into those + TreeNodeRecursion::Jump + } else { + TreeNodeRecursion::Continue + }; + Ok(Transformed::new(expr, false, recursion)) + }, + |expr| { + // f_up: unalias on up so we can remove nested aliases like + // `(x as foo) as bar` + if let Expr::Alias(Alias { expr, .. }) = expr { + Ok(Transformed::yes(*expr)) + } else { + Ok(Transformed::no(expr)) + } + }, + ) + // unreachable code: internal closure doesn't return err + .unwrap() } /// Return `self IN ` if `negated` is false, otherwise diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index aef11853c81c..2921541934f8 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -878,8 +878,7 @@ impl LogicalPlan { } LogicalPlan::Filter { .. } => { assert_eq!(1, expr.len()); - let predicate = expr.pop().unwrap(); - let predicate = Filter::remove_aliases(predicate)?.data; + let predicate = expr.pop().unwrap().unalias_nested().data; Filter::try_new(predicate, Arc::new(inputs.swap_remove(0))) .map(LogicalPlan::Filter) @@ -2209,38 +2208,6 @@ impl Filter { } false } - - /// Remove aliases from a predicate for use in a `Filter` - /// - /// filter predicates should not contain aliased expressions so we remove - /// any aliases. - /// - /// before this logic was added we would have aliases within filters such as - /// for benchmark q6: - /// - /// ```sql - /// lineitem.l_shipdate >= Date32(\"8766\") - /// AND lineitem.l_shipdate < Date32(\"9131\") - /// AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount >= - /// Decimal128(Some(49999999999999),30,15) - /// AND CAST(lineitem.l_discount AS Decimal128(30, 15)) AS lineitem.l_discount <= - /// Decimal128(Some(69999999999999),30,15) - /// AND lineitem.l_quantity < Decimal128(Some(2400),15,2) - /// ``` - pub fn remove_aliases(predicate: Expr) -> Result> { - predicate.transform_down(|expr| { - match expr { - Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) => { - // subqueries could contain aliases so we don't recurse into those - Ok(Transformed::new(expr, false, TreeNodeRecursion::Jump)) - } - Expr::Alias(Alias { expr, .. }) => { - Ok(Transformed::new(*expr, true, TreeNodeRecursion::Jump)) - } - _ => Ok(Transformed::no(expr)), - } - }) - } } /// Window its input based on a set of window spec and window function (e.g. SUM or RANK) diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 81b987b0d4fc..5eec5ba09d7e 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -345,9 +345,12 @@ impl CommonSubexprEliminate { self.try_unary_plan(expr, input, config)? .transform_data(|(mut new_expr, new_input)| { assert_eq!(new_expr.len(), 1); // passed in vec![predicate] - let new_predicate = new_expr.pop().unwrap(); - Ok(Filter::remove_aliases(new_predicate)? - .update_data(|new_predicate| (new_predicate, new_input))) + let new_predicate = new_expr + .pop() + .unwrap() + .unalias_nested() + .update_data(|new_predicate| (new_predicate, new_input)); + Ok(new_predicate) })? .map_data(|(new_predicate, new_input)| { Filter::try_new(new_predicate, Arc::new(new_input)) diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index 2fbf77523bd1..4684dbd3b043 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -600,7 +600,7 @@ fn rewrite_expr(expr: Expr, input: &Projection) -> Result> { // * the current column is an expression "f" // // return the expression `d + e` (not `d + e` as f) - let input_expr = input.expr[idx].clone().unalias_nested(); + let input_expr = input.expr[idx].clone().unalias_nested().data; Ok(Transformed::yes(input_expr)) } // Unsupported type for consecutive projection merge analysis.