You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2020/05/12 13:38:47 UTC

[spark] branch branch-3.0 updated: [SPARK-31678][SQL] Print error stack trace for Spark SQL CLI when error occurs

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 2549e38  [SPARK-31678][SQL] Print error stack trace for Spark SQL CLI when error occurs
2549e38 is described below

commit 2549e38690fd461c7d01518f1c5df2452efa66b5
Author: Kent Yao <ya...@hotmail.com>
AuthorDate: Tue May 12 13:37:13 2020 +0000

    [SPARK-31678][SQL] Print error stack trace for Spark SQL CLI when error occurs
    
    ### What changes were proposed in this pull request?
    
    When I was finding the root cause for SPARK-31675, I noticed that it was very difficult for me to see what was actually going on, since it output nothing else but only
    ```sql
    Error in query: java.lang.IllegalArgumentException: Wrong FS: blablah/.hive-staging_blahbla, expected: hdfs://cluster1
    ```
    
    It is really hard for us to find causes through such a simple error message without a certain amount of experience.
    
    In this PR, I propose to print all of the stack traces when AnalysisException occurs if there are underlying root causes, also we can escape this via `-S` option.
    
    ### Why are the changes needed?
    
    In SPARK-11188,
    
    >For analysis exceptions in the sql-shell, we should only print the error message to the screen. The stacktrace will never have useful information since this error is used to signify an error with the query.
    
    But nowadays, some `AnalysisException`s do have useful information for us to debug, e.g. the `AnalysisException` below may contain exceptions from hive or Hadoop side.
    
    https://github.com/apache/spark/blob/a28ed86a387b286745b30cd4d90b3d558205a5a7/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala#L97-L112
    
    ```scala
    	at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
    	at org.apache.hadoop.fs.FileSystem.makeQualified(FileSystem.java:468)
    	at org.apache.hadoop.hive.common.FileUtils.isSubDir(FileUtils.java:626)
    	at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:2850)
    	at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1398)
    	at org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:1593)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, `bin/spark-sql` will print all the stack trace when an AnalysisException which contains root causes occurs, before this fix, only the message will be printed.
    
    #### before
    
    ```scala
    Error in query: java.lang.IllegalArgumentException: Wrong FS: hdfs:..., expected: hdfs://hz-cluster10;
    ```
    
    #### After
    ```scala
    Error in query: java.lang.IllegalArgumentException: Wrong FS: hdfs:..., expected: hdfs://hz-cluster10;
    org.apache.spark.sql.AnalysisException: java.lang.IllegalArgumentException: Wrong FS: ..., expected: hdfs://hz-cluster10;
    	at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:109)
    	at org.apache.spark.sql.hive.HiveExternalCatalog.loadPartition(HiveExternalCatalog.scala:890)
    	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.loadPartition(ExternalCatalogWithListener.scala:179)
    	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:312)
    	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
    	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
    	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
    	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
    	at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
    	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3644)
    	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
    	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
    	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
    	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
    	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
    	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3642)
    	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
    	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
    	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
    	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
    	at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:607)
    	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
    	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:602)
    	at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650)
    	at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63)
    	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:376)
    	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:486)
    	at scala.collection.Iterator.foreach(Iterator.scala:941)
    	at scala.collection.Iterator.foreach$(Iterator.scala:941)
    	at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
    	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
    	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
    	at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
    	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processLine(SparkSQLCLIDriver.scala:480)
    	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:282)
    	at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
    	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    	at java.lang.reflect.Method.invoke(Method.java:498)
    	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
    	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)
    	at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:165)
    	at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:163)
    	at java.security.AccessController.doPrivileged(Native Method)
    	at javax.security.auth.Subject.doAs(Subject.java:422)
    	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746)
    	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:163)
    	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
    	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
    	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)
    	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)
    	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
    Caused by: java.lang.IllegalArgumentException: Wrong FS: ..., expected: hdfs://hz-cluster10
    	at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
    	at org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:194)
    	at org.apache.hadoop.hdfs.DistributedFileSystem.getEZForPath(DistributedFileSystem.java:2093)
    	at org.apache.hadoop.hdfs.client.HdfsAdmin.getEncryptionZoneForPath(HdfsAdmin.java:289)
    	at org.apache.hadoop.hive.shims.Hadoop23Shims$HdfsEncryptionShim.isPathEncrypted(Hadoop23Shims.java:1221)
    	at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2607)
    	at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:2892)
    	at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1398)
    	at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1324)
    	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    	at java.lang.reflect.Method.invoke(Method.java:498)
    	at org.apache.spark.sql.hive.client.Shim_v0_14.loadPartition(HiveShim.scala:927)
    	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$loadPartition$1(HiveClientImpl.scala:870)
    	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
    	at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294)
    	at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
    	at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
    	at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276)
    	at org.apache.spark.sql.hive.client.HiveClientImpl.loadPartition(HiveClientImpl.scala:860)
    	at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$loadPartition$1(HiveExternalCatalog.scala:911)
    	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
    	at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
    	... 52 more
    ```
    
    You can use `-S` option to restore old behavior if you find the error is too verbose.
    
    ### How was this patch tested?
    
    Existing CliSuite -
    `SPARK-11188 Analysis error reporting`
    
    Add new test and verify manually
    
    Closes #28499 from yaooqinn/SPARK-31678.
    
    Authored-by: Kent Yao <ya...@hotmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
    (cherry picked from commit ce714d81894a48e2d06c530674c2190e0483e1b4)
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../sql/hive/thriftserver/SparkSQLCLIDriver.scala      | 16 ++++++++++++----
 .../apache/spark/sql/hive/thriftserver/CliSuite.scala  | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index bffa24c..50b5eb4 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -380,10 +380,18 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
 
           ret = rc.getResponseCode
           if (ret != 0) {
-            // For analysis exception, only the error is printed out to the console.
-            rc.getException() match {
-              case e : AnalysisException =>
-                err.println(s"""Error in query: ${e.getMessage}""")
+            rc.getException match {
+              case e: AnalysisException => e.cause match {
+                case Some(_) if !sessionState.getIsSilent =>
+                  err.println(
+                    s"""Error in query: ${e.getMessage}
+                       |${org.apache.hadoop.util.StringUtils.stringifyException(e)}
+                     """.stripMargin)
+                // For analysis exceptions in silent mode or simple ones that only related to the
+                // query itself, such as `NoSuchDatabaseException`, only the error is printed out
+                // to the console.
+                case _ => err.println(s"""Error in query: ${e.getMessage}""")
+              }
               case _ => err.println(rc.getErrorMessage())
             }
             driver.close()
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 265e777..bcdcd07 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -509,4 +509,22 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterE
       "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
     )
   }
+
+  test("AnalysisException with root cause will be printStacktrace") {
+    // If it is not in silent mode, will print the stacktrace
+    runCliWithin(
+      1.minute,
+      extraArgs = Seq("--hiveconf", "hive.session.silent=false",
+        "-e", "select date_sub(date'2011-11-11', '1.2');"),
+      errorResponses = Seq("NumberFormatException"))(
+      ("", "Error in query: The second argument of 'date_sub' function needs to be an integer."),
+      ("", "NumberFormatException: invalid input syntax for type numeric: 1.2"))
+    // If it is in silent mode, will print the error message only
+    runCliWithin(
+      1.minute,
+      extraArgs = Seq("--conf", "spark.hive.session.silent=true",
+        "-e", "select date_sub(date'2011-11-11', '1.2');"),
+      errorResponses = Seq("AnalysisException"))(
+      ("", "Error in query: The second argument of 'date_sub' function needs to be an integer."))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org