You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/07/18 01:03:49 UTC
spark git commit: [SPARK-9113] [SQL] enable analysis check code for
self join
Repository: spark
Updated Branches:
refs/heads/master 15fc2ffe5 -> fd6b3101f
[SPARK-9113] [SQL] enable analysis check code for self join
The check was unreachable before, as `case operator: LogicalPlan` catches everything already.
Author: Wenchen Fan <cl...@outlook.com>
Closes #7449 from cloud-fan/tmp and squashes the following commits:
2bb6637 [Wenchen Fan] add test
5493aea [Wenchen Fan] add the check back
27221a7 [Wenchen Fan] remove unnecessary analysis check code for self join
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd6b3101
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd6b3101
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd6b3101
Branch: refs/heads/master
Commit: fd6b3101fbb0a8c3ebcf89ce9b4e8664406d9869
Parents: 15fc2ff
Author: Wenchen Fan <cl...@outlook.com>
Authored: Fri Jul 17 16:03:33 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Fri Jul 17 16:03:33 2015 -0700
----------------------------------------------------------------------
.../spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
.../sql/catalyst/analysis/CheckAnalysis.scala | 28 +++++++++-----------
.../catalyst/plans/logical/basicOperators.scala | 6 ++---
.../catalyst/analysis/AnalysisErrorSuite.scala | 14 ++++++++--
4 files changed, 29 insertions(+), 21 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fd6b3101/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index df8e7f2..e58f3f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -316,7 +316,7 @@ class Analyzer(
)
// Special handling for cases when self-join introduce duplicate expression ids.
- case j @ Join(left, right, _, _) if left.outputSet.intersect(right.outputSet).nonEmpty =>
+ case j @ Join(left, right, _, _) if !j.selfJoinResolved =>
val conflictingAttributes = left.outputSet.intersect(right.outputSet)
logDebug(s"Conflicting attributes ${conflictingAttributes.mkString(",")} in $j")
http://git-wip-us.apache.org/repos/asf/spark/blob/fd6b3101/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 476ac2b..c7f9713 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -109,29 +109,27 @@ trait CheckAnalysis {
s"resolved attribute(s) $missingAttributes missing from $input " +
s"in operator ${operator.simpleString}")
- case o if !o.resolved =>
- failAnalysis(
- s"unresolved operator ${operator.simpleString}")
-
case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
failAnalysis(
s"""Only a single table generating function is allowed in a SELECT clause, found:
| ${exprs.map(_.prettyString).mkString(",")}""".stripMargin)
+ // Special handling for cases when self-join introduce duplicate expression ids.
+ case j @ Join(left, right, _, _) if left.outputSet.intersect(right.outputSet).nonEmpty =>
+ val conflictingAttributes = left.outputSet.intersect(right.outputSet)
+ failAnalysis(
+ s"""
+ |Failure when resolving conflicting references in Join:
+ |$plan
+ |Conflicting attributes: ${conflictingAttributes.mkString(",")}
+ |""".stripMargin)
+
+ case o if !o.resolved =>
+ failAnalysis(
+ s"unresolved operator ${operator.simpleString}")
case _ => // Analysis successful!
}
-
- // Special handling for cases when self-join introduce duplicate expression ids.
- case j @ Join(left, right, _, _) if left.outputSet.intersect(right.outputSet).nonEmpty =>
- val conflictingAttributes = left.outputSet.intersect(right.outputSet)
- failAnalysis(
- s"""
- |Failure when resolving conflicting references in Join:
- |$plan
- |Conflicting attributes: ${conflictingAttributes.mkString(",")}
- |""".stripMargin)
-
}
extendedCheckRules.foreach(_(plan))
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fd6b3101/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
index fbe104d..17a9124 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala
@@ -123,11 +123,11 @@ case class Join(
}
}
- private def selfJoinResolved: Boolean = left.outputSet.intersect(right.outputSet).isEmpty
+ def selfJoinResolved: Boolean = left.outputSet.intersect(right.outputSet).isEmpty
- // Joins are only resolved if they don't introduce ambiguious expression ids.
+ // Joins are only resolved if they don't introduce ambiguous expression ids.
override lazy val resolved: Boolean = {
- childrenResolved && !expressions.exists(!_.resolved) && selfJoinResolved
+ childrenResolved && expressions.forall(_.resolved) && selfJoinResolved
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fd6b3101/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index f0f1710..2147d07 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -23,10 +23,11 @@ import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.types._
-import org.apache.spark.sql.catalyst.{InternalRow, SimpleCatalystConf}
+import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.types._
case class TestFunction(
children: Seq[Expression],
@@ -164,4 +165,13 @@ class AnalysisErrorSuite extends SparkFunSuite with BeforeAndAfter {
assert(message.contains("resolved attribute(s) a#1 missing from a#2"))
}
+
+ test("error test for self-join") {
+ val join = Join(testRelation, testRelation, Inner, None)
+ val error = intercept[AnalysisException] {
+ SimpleAnalyzer.checkAnalysis(join)
+ }
+ error.message.contains("Failure when resolving conflicting references in Join")
+ error.message.contains("Conflicting attributes")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org