You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2016/02/29 19:10:03 UTC
spark git commit: [SPARK-13544][SQL] Rewrite/Propagate Constraints
for Aliases in Aggregate
Repository: spark
Updated Branches:
refs/heads/master 02aa499df -> bc65f60ef
[SPARK-13544][SQL] Rewrite/Propagate Constraints for Aliases in Aggregate
#### What changes were proposed in this pull request?
After analysis by Analyzer, two operators could have alias. They are `Project` and `Aggregate`. So far, we only rewrite and propagate constraints if `Alias` is defined in `Project`. This PR is to resolve this issue in `Aggregate`.
#### How was this patch tested?
Added a test case for `Aggregate` in `ConstraintPropagationSuite`.
marmbrus sameeragarwal
Author: gatorsmile <ga...@gmail.com>
Closes #11422 from gatorsmile/validConstraintsInUnaryNodes.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc65f60e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc65f60e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc65f60e
Branch: refs/heads/master
Commit: bc65f60ef7c920db756bfe643f7edbdf3593a989
Parents: 02aa499
Author: gatorsmile <ga...@gmail.com>
Authored: Mon Feb 29 10:10:04 2016 -0800
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Mon Feb 29 10:10:04 2016 -0800
----------------------------------------------------------------------
.../catalyst/plans/logical/LogicalPlan.scala | 16 +++++++++++
.../catalyst/plans/logical/basicOperators.scala | 30 +++++---------------
.../plans/ConstraintPropagationSuite.scala | 15 ++++++++++
3 files changed, 38 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/bc65f60e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 8095083..31e775d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -315,6 +315,22 @@ abstract class UnaryNode extends LogicalPlan {
override def children: Seq[LogicalPlan] = child :: Nil
+ /**
+ * Generates an additional set of aliased constraints by replacing the original constraint
+ * expressions with the corresponding alias
+ */
+ protected def getAliasedConstraints(projectList: Seq[NamedExpression]): Set[Expression] = {
+ projectList.flatMap {
+ case a @ Alias(e, _) =>
+ child.constraints.map(_ transform {
+ case expr: Expression if expr.semanticEquals(e) =>
+ a.toAttribute
+ }).union(Set(EqualNullSafe(e, a.toAttribute)))
+ case _ =>
+ Set.empty[Expression]
+ }.toSet
+ }
+
override protected def validConstraints: Set[Expression] = child.constraints
override def statistics: Statistics = {
http://git-wip-us.apache.org/repos/asf/spark/blob/bc65f60e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index 5d2a65b..e81a0f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -51,25 +51,8 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extend
!expressions.exists(!_.resolved) && childrenResolved && !hasSpecialExpressions
}
- /**
- * Generates an additional set of aliased constraints by replacing the original constraint
- * expressions with the corresponding alias
- */
- private def getAliasedConstraints: Set[Expression] = {
- projectList.flatMap {
- case a @ Alias(e, _) =>
- child.constraints.map(_ transform {
- case expr: Expression if expr.semanticEquals(e) =>
- a.toAttribute
- }).union(Set(EqualNullSafe(e, a.toAttribute)))
- case _ =>
- Set.empty[Expression]
- }.toSet
- }
-
- override def validConstraints: Set[Expression] = {
- child.constraints.union(getAliasedConstraints)
- }
+ override def validConstraints: Set[Expression] =
+ child.constraints.union(getAliasedConstraints(projectList))
}
/**
@@ -126,9 +109,8 @@ case class Filter(condition: Expression, child: LogicalPlan)
override def maxRows: Option[Long] = child.maxRows
- override protected def validConstraints: Set[Expression] = {
+ override protected def validConstraints: Set[Expression] =
child.constraints.union(splitConjunctivePredicates(condition).toSet)
- }
}
abstract class SetOperation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode {
@@ -157,9 +139,8 @@ case class Intersect(left: LogicalPlan, right: LogicalPlan) extends SetOperation
leftAttr.withNullability(leftAttr.nullable && rightAttr.nullable)
}
- override protected def validConstraints: Set[Expression] = {
+ override protected def validConstraints: Set[Expression] =
leftConstraints.union(rightConstraints)
- }
// Intersect are only resolved if they don't introduce ambiguous expression ids,
// since the Optimizer will convert Intersect to Join.
@@ -442,6 +423,9 @@ case class Aggregate(
override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
override def maxRows: Option[Long] = child.maxRows
+ override def validConstraints: Set[Expression] =
+ child.constraints.union(getAliasedConstraints(aggregateExpressions))
+
override def statistics: Statistics = {
if (groupingExpressions.isEmpty) {
Statistics(sizeInBytes = 1)
http://git-wip-us.apache.org/repos/asf/spark/blob/bc65f60e/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 373b1ff..b68432b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -72,6 +72,21 @@ class ConstraintPropagationSuite extends SparkFunSuite {
IsNotNull(resolveColumn(tr, "c"))))
}
+ test("propagating constraints in aggregate") {
+ val tr = LocalRelation('a.int, 'b.string, 'c.int)
+
+ assert(tr.analyze.constraints.isEmpty)
+
+ val aliasedRelation = tr.where('c.attr > 10 && 'a.attr < 5)
+ .groupBy('a, 'c, 'b)('a, 'c.as("c1"), count('a).as("a3")).select('c1, 'a).analyze
+
+ verifyConstraints(aliasedRelation.analyze.constraints,
+ Set(resolveColumn(aliasedRelation.analyze, "c1") > 10,
+ IsNotNull(resolveColumn(aliasedRelation.analyze, "c1")),
+ resolveColumn(aliasedRelation.analyze, "a") < 5,
+ IsNotNull(resolveColumn(aliasedRelation.analyze, "a"))))
+ }
+
test("propagating constraints in aliases") {
val tr = LocalRelation('a.int, 'b.string, 'c.int)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org