You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/24 00:30:36 UTC

spark git commit: [SPARK-23459][SQL] Improve the error message when unknown column is specified in partition columns

Repository: spark
Updated Branches:
  refs/heads/master 855ce13d0 -> 1a198ce8f


[SPARK-23459][SQL] Improve the error message when unknown column is specified in partition columns

## What changes were proposed in this pull request?

This PR avoids to print schema internal information when unknown column is specified in partition columns. This PR prints column names in the schema with more readable format.

The following is an example.

Source code
```
test("save with an unknown partition column") {
  withTempDir { dir =>
    val path = dir.getCanonicalPath
      Seq(1L -> "a").toDF("i", "j").write
        .format("parquet")
        .partitionBy("unknownColumn")
        .save(path)
  }
```
Output without this PR
```
Partition column unknownColumn not found in schema StructType(StructField(i,LongType,false), StructField(j,StringType,true));
```

Output with this PR
```
Partition column unknownColumn not found in schema struct<i:bigint,j:string>;
```

## How was this patch tested?

Manually tested

Author: Kazuaki Ishizaki <is...@jp.ibm.com>

Closes #20653 from kiszk/SPARK-23459.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a198ce8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a198ce8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a198ce8

Branch: refs/heads/master
Commit: 1a198ce8f580bcf35b9cbfab403fc40f821046a1
Parents: 855ce13
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Fri Feb 23 16:30:32 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Fri Feb 23 16:30:32 2018 -0800

----------------------------------------------------------------------
 .../execution/datasources/PartitioningUtils.scala   |  3 ++-
 .../apache/spark/sql/sources/SaveLoadSuite.scala    | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1a198ce8/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 379acb6..f9a2480 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -486,7 +486,8 @@ object PartitioningUtils {
     val equality = columnNameEquality(caseSensitive)
     StructType(partitionColumns.map { col =>
       schema.find(f => equality(f.name, col)).getOrElse {
-        throw new AnalysisException(s"Partition column $col not found in schema $schema")
+        val schemaCatalog = schema.catalogString
+        throw new AnalysisException(s"Partition column `$col` not found in schema $schemaCatalog")
       }
     }).asNullable
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/1a198ce8/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
index 773d34d..12779b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala
@@ -126,4 +126,20 @@ class SaveLoadSuite extends DataSourceTest with SharedSQLContext with BeforeAndA
 
     checkLoad(df2, "jsonTable2")
   }
+
+  test("SPARK-23459: Improve error message when specified unknown column in partition columns") {
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath
+      val unknown = "unknownColumn"
+      val df = Seq(1L -> "a").toDF("i", "j")
+      val schemaCatalog = df.schema.catalogString
+      val e = intercept[AnalysisException] {
+        df.write
+          .format("parquet")
+          .partitionBy(unknown)
+          .save(path)
+      }.getMessage
+      assert(e.contains(s"Partition column `$unknown` not found in schema $schemaCatalog"))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org