You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2019/05/08 08:30:08 UTC

[GitHub] [spark] cloud-fan commented on a change in pull request #24372: [SPARK-27462][SQL] Enhance insert into hive table that could choose some columns in target table flexibly.

cloud-fan commented on a change in pull request #24372: [SPARK-27462][SQL] Enhance insert into hive table that could choose some columns in target table flexibly.
URL: https://github.com/apache/spark/pull/24372#discussion_r281964750
 
 

 ##########
 File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
 ##########
 @@ -332,24 +334,58 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
   private def preprocess(
       insert: InsertIntoTable,
       tblName: String,
+      insertedCols: Option[Seq[String]],
       partColNames: Seq[String]): InsertIntoTable = {
 
     val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
       insert.partition, partColNames, tblName, conf.resolver)
 
     val staticPartCols = normalizedPartSpec.filter(_._2.isDefined).keySet
+    val selectedCols = if (insertedCols == None) {
+      insert.table.output
+    } else {
+      val tableCols = insert.table.output.map(_.name)
+      val noexistsCols = insertedCols.get.filterNot(col => tableCols.contains(col))
+      if (noexistsCols.size > 0) {
+        throw new AnalysisException(s"Table $tblName does not exists these columns: $noexistsCols.")
+      }
+      insert.table.output.filter(a => insertedCols.get.contains(a.name))
+    }
     val expectedColumns = insert.table.output.filterNot(a => staticPartCols.contains(a.name))
 
     if (expectedColumns.length != insert.query.schema.length) {
       throw new AnalysisException(
-        s"$tblName requires that the data to be inserted have the same number of columns as the " +
-          s"target table: target table has ${insert.table.output.size} column(s) but the " +
-          s"inserted data has ${insert.query.output.length + staticPartCols.size} column(s), " +
+        s"$tblName requires that the data to be inserted have the same number of columns as " +
+          s"the number of columns selected in the target table: the number of columns selected " +
+          s"has ${expectedColumns.length + staticPartCols.size} column(s) but the inserted data " +
+          s"has ${insert.query.output.length + staticPartCols.size} column(s), " +
           s"including ${staticPartCols.size} partition column(s) having constant value(s).")
     }
 
+    val tableColumns = insert.table.output.filterNot(a => staticPartCols.contains(a.name))
+    val tableCols = tableColumns.map(_.name)
+    val filledQuery = if (insertedCols == None) {
+      insert.query
+    } else {
+      // Because `HiveFileFormat` writes data according to the index of columns which belongs
+      // target table, in order to align the data, we need to fill in some empty expressions.
+      val cols = insertedCols.get
+      val project = insert.query.asInstanceOf[Project]
+      val filledProjectList = ArrayBuffer.empty[NamedExpression]
+      var i = 0
+      tableCols.foreach { tableCol =>
+        if (cols.contains(tableCol)) {
+          filledProjectList += project.projectList(i)
+          i += 1
+        } else {
+          filledProjectList += Alias(Literal(null, NullType), "NULL")()
 
 Review comment:
   what if the column is not nullable in the target table?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org