You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/07/31 23:36:31 UTC

[GitHub] [spark] brkyvz commented on a change in pull request #25305: [SPARK-28572][SQL] Simple analyzer checks for CREATE TABLE v2

brkyvz commented on a change in pull request #25305: [SPARK-28572][SQL] Simple analyzer checks for CREATE TABLE v2
URL: https://github.com/apache/spark/pull/25305#discussion_r309473280
 
 

 ##########
 File path: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
 ##########
 @@ -160,6 +163,68 @@ class V2SessionCatalogSuite
     assert(catalog.tableExists(testIdent))
   }
 
+  test("createTable: duplicate column names in the table definition") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING $v2Source")
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the table definition of t"))
+      }
+    }
+  }
+
+  test("createTable: partition column names not in table definition") {
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE TABLE tbl(a int, b string) USING $v2Source PARTITIONED BY (c)")
+    }.getMessage
+    assert(e.contains("Couldn't find column c in"))
+  }
+
+  test("createTable: bucket column names not in table definition") {
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE TABLE tbl(a int, b string) " +
+        s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS")
+    }.getMessage
+    assert(e.contains("Couldn't find column c in"))
+  }
+
+  test("createTable: column repeated in partition columns") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val e = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)")
+        }.getMessage
+        assert(e.contains("Found duplicate column(s) in the partition schema"))
+      }
+    }
+  }
+
+  test("createTable: column repeated in bucket columns") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val e = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT) USING $v2Source " +
+            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
+        }.getMessage
+        assert(e.contains("Found duplicate column(s) in the bucket definition"))
+      }
+    }
+  }
+
+  test("createTable: all columns used in partitioning") {
+    Seq(
+      "PARTITIONED BY (a, b)",
+      "CLUSTERED BY (a, b) INTO 2 BUCKETS",
+      "PARTITIONED BY (a) CLUSTERED BY (b) INTO 2 BUCKETS").foreach { partitioning =>
+
+      val e = intercept[AnalysisException] {
+        sql(s"CREATE TABLE t(a INT, b STRING) USING $v2Source $partitioning")
+      }.getMessage
+      assert(e.contains("Cannot use all columns for partitioning."))
 
 Review comment:
   Having data only created be partition transforms and no data columns?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org