You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2015/06/11 23:11:00 UTC

spark git commit: [SPARK-7915] [SQL] Support specifying the column list for target table in CTAS

Repository: spark
Updated Branches:
  refs/heads/master c8d551d54 -> 040f223c5


[SPARK-7915] [SQL] Support specifying the column list for target table in CTAS

```
create table t1 (a int, b string) as select key, value from src;

desc t1;
key	int	NULL
value	string	NULL
```

Thus Hive doesn't support specifying the column list for target table in CTAS, however, we should either throwing exception explicity, or supporting the this feature, we just pick up the later one, which seems useful and straightforward.

Author: Cheng Hao <ha...@intel.com>

Closes #6458 from chenghao-intel/ctas_column and squashes the following commits:

d1fa9b6 [Cheng Hao] bug in unittest
4e701aa [Cheng Hao] update as feedback
f305ec1 [Cheng Hao] support specifying the column list for target table in CTAS


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/040f223c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/040f223c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/040f223c

Branch: refs/heads/master
Commit: 040f223c5b9ca724c9f2b4abb59c21b3a23720ba
Parents: c8d551d
Author: Cheng Hao <ha...@intel.com>
Authored: Thu Jun 11 14:03:08 2015 -0700
Committer: Michael Armbrust <mi...@databricks.com>
Committed: Thu Jun 11 14:03:08 2015 -0700

----------------------------------------------------------------------
 .../sql/hive/execution/CreateTableAsSelect.scala  | 16 ++++++++++++----
 .../spark/sql/hive/execution/SQLQuerySuite.scala  | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/040f223c/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
index 7d3ec12..87c36a8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala
@@ -50,17 +50,25 @@ case class CreateTableAsSelect(
       import org.apache.hadoop.io.Text
       import org.apache.hadoop.mapred.TextInputFormat
 
-      val withSchema =
+      val withFormat =
         tableDesc.copy(
-          schema =
-            query.output.map(c =>
-              HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)),
           inputFormat =
             tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)),
           outputFormat =
             tableDesc.outputFormat
               .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)),
           serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName())))
+
+      val withSchema = if (withFormat.schema.isEmpty) {
+        // Hive doesn't support specifying the column list for target table in CTAS
+        // However we don't think SparkSQL should follow that.
+        tableDesc.copy(schema =
+        query.output.map(c =>
+          HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)))
+      } else {
+        withFormat
+      }
+
       hiveContext.catalog.client.createTable(withSchema)
 
       // Get the Metastore Relation

http://git-wip-us.apache.org/repos/asf/spark/blob/040f223c/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 40a3567..8bd4900 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -360,6 +360,24 @@ class SQLQuerySuite extends QueryTest {
     }
   }
 
+  test("specifying the column list for CTAS") {
+    Seq((1, "111111"), (2, "222222")).toDF("key", "value").registerTempTable("mytable1")
+
+    sql("create table gen__tmp(a int, b string) as select key, value from mytable1")
+    checkAnswer(
+      sql("SELECT a, b from gen__tmp"),
+      sql("select key, value from mytable1").collect())
+    sql("DROP TABLE gen__tmp")
+
+    sql("create table gen__tmp(a double, b double) as select key, value from mytable1")
+    checkAnswer(
+      sql("SELECT a, b from gen__tmp"),
+      sql("select cast(key as double), cast(value as double) from mytable1").collect())
+    sql("DROP TABLE gen__tmp")
+
+    sql("drop table mytable1")
+  }
+
   test("command substitution") {
     sql("set tbl=src")
     checkAnswer(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org