You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2016/10/18 17:58:11 UTC
spark git commit: [SPARK-17751][SQL][BACKPORT-2.0] Remove
spark.sql.eagerAnalysis and Output the Plan if Existed in AnalysisException
Repository: spark
Updated Branches:
refs/heads/branch-2.0 9e806f2a4 -> 2aa25833c
[SPARK-17751][SQL][BACKPORT-2.0] Remove spark.sql.eagerAnalysis and Output the Plan if Existed in AnalysisException
### What changes were proposed in this pull request?
This PR is to backport the fix https://github.com/apache/spark/pull/15316 to 2.0.
Dataset always does eager analysis now. Thus, `spark.sql.eagerAnalysis` is not used any more. Thus, we need to remove it.
This PR also outputs the plan. Without the fix, the analysis error is like
```
cannot resolve '`k1`' given input columns: [k, v]; line 1 pos 12
```
After the fix, the analysis error becomes:
```
org.apache.spark.sql.AnalysisException: cannot resolve '`k1`' given input columns: [k, v]; line 1 pos 12;
'Project [unresolvedalias(CASE WHEN ('k1 = 2) THEN 22 WHEN ('k1 = 4) THEN 44 ELSE 0 END, None), v#6]
+- SubqueryAlias t
+- Project [_1#2 AS k#5, _2#3 AS v#6]
+- LocalRelation [_1#2, _2#3]
```
### How was this patch tested?
N/A
Author: gatorsmile <ga...@gmail.com>
Closes #15529 from gatorsmile/eagerAnalysis20.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2aa25833
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2aa25833
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2aa25833
Branch: refs/heads/branch-2.0
Commit: 2aa25833c6f40af13a03a813b5f75d515f689577
Parents: 9e806f2
Author: gatorsmile <ga...@gmail.com>
Authored: Tue Oct 18 10:58:19 2016 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Tue Oct 18 10:58:19 2016 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/AnalysisException.scala | 7 +++++++
.../org/apache/spark/sql/execution/debug/package.scala | 9 ---------
.../scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ----------
.../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 3 +++
4 files changed, 10 insertions(+), 19 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 6911843..d3ee8f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -43,6 +43,13 @@ class AnalysisException protected[sql] (
}
override def getMessage: String = {
+ val planAnnotation = plan.map(p => s";\n$p").getOrElse("")
+ getSimpleMessage + planAnnotation
+ }
+
+ // Outputs an exception without the logical plan.
+ // For testing only
+ def getSimpleMessage: String = {
val lineAnnotation = line.map(l => s" line $l").getOrElse("")
val positionAnnotation = startPosition.map(p => s" pos $p").getOrElse("")
s"$message;$lineAnnotation$positionAnnotation"
http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index d321f4c..dd9d837 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -70,15 +70,6 @@ package object debug {
}
/**
- * Augments [[SparkSession]] with debug methods.
- */
- implicit class DebugSQLContext(sparkSession: SparkSession) {
- def debug(): Unit = {
- sparkSession.conf.set(SQLConf.DATAFRAME_EAGER_ANALYSIS.key, false)
- }
- }
-
- /**
* Augments [[Dataset]]s with debug methods.
*/
implicit class DebugQuery(query: Dataset[_]) extends Logging {
http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 452eeed..8ba87c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -396,14 +396,6 @@ object SQLConf {
.intConf
.createWithDefault(32)
- // Whether to perform eager analysis when constructing a dataframe.
- // Set to false when debugging requires the ability to look at invalid query plans.
- val DATAFRAME_EAGER_ANALYSIS = SQLConfigBuilder("spark.sql.eagerAnalysis")
- .internal()
- .doc("When true, eagerly applies query analysis on DataFrame operations.")
- .booleanConf
- .createWithDefault(true)
-
// Whether to automatically resolve ambiguity in join conditions for self-joins.
// See SPARK-6231.
val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
@@ -713,8 +705,6 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
// See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more information.
def schemaStringLengthThreshold: Int = getConf(SCHEMA_STRING_LENGTH_THRESHOLD)
- def dataFrameEagerAnalysis: Boolean = getConf(DATAFRAME_EAGER_ANALYSIS)
-
def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
http://git-wip-us.apache.org/repos/asf/spark/blob/2aa25833/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 55d5a56..02841d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -220,6 +220,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
} catch {
+ case a: AnalysisException if a.plan.nonEmpty =>
+ // Do not output the logical plan tree which contains expression IDs.
+ (StructType(Seq.empty), Seq(a.getClass.getName, a.getSimpleMessage))
case NonFatal(e) =>
// If there is an exception, put the exception class followed by the message.
(StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org