You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/08/11 02:34:56 UTC

[GitHub] [hudi] pengzhiwei2018 commented on a change in pull request #3415: [HUDI-2279]Support column name matching for insert * and update set *

pengzhiwei2018 commented on a change in pull request #3415:
URL: https://github.com/apache/hudi/pull/3415#discussion_r686447091



##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala
##########
@@ -142,11 +142,24 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
         val resolvedCondition = condition.map(resolveExpressionFrom(resolvedSource)(_))
         val resolvedAssignments = if (isInsertOrUpdateStar(assignments)) {
           // assignments is empty means insert * or update set *
-          // we fill assign all the source fields to the target fields
-          target.output
-            .filter(attr => !HoodieSqlUtils.isMetaField(attr.name))
-            .zip(resolvedSource.output.filter(attr => !HoodieSqlUtils.isMetaField(attr.name)))
-            .map { case (targetAttr, sourceAttr) => Assignment(targetAttr, sourceAttr) }
+          val resolvedSourceOutputWithoutMetaFields = resolvedSource.output.filter(attr => !HoodieSqlUtils.isMetaField(attr.name))
+          val targetOutputWithoutMetaFields = target.output.filter(attr => !HoodieSqlUtils.isMetaField(attr.name))
+          val resolvedSourceColumnNamesWithoutMetaFields = resolvedSourceOutputWithoutMetaFields.map(attr => attr.name)
+
+          if(targetOutputWithoutMetaFields.filter(attr => resolvedSourceColumnNamesWithoutMetaFields.contains(attr.name)).length

Review comment:
       Can we test the Equality using a Set?

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala
##########
@@ -142,11 +142,24 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
         val resolvedCondition = condition.map(resolveExpressionFrom(resolvedSource)(_))
         val resolvedAssignments = if (isInsertOrUpdateStar(assignments)) {
           // assignments is empty means insert * or update set *
-          // we fill assign all the source fields to the target fields
-          target.output
-            .filter(attr => !HoodieSqlUtils.isMetaField(attr.name))
-            .zip(resolvedSource.output.filter(attr => !HoodieSqlUtils.isMetaField(attr.name)))
-            .map { case (targetAttr, sourceAttr) => Assignment(targetAttr, sourceAttr) }
+          val resolvedSourceOutputWithoutMetaFields = resolvedSource.output.filter(attr => !HoodieSqlUtils.isMetaField(attr.name))
+          val targetOutputWithoutMetaFields = target.output.filter(attr => !HoodieSqlUtils.isMetaField(attr.name))
+          val resolvedSourceColumnNamesWithoutMetaFields = resolvedSourceOutputWithoutMetaFields.map(attr => attr.name)
+
+          if(targetOutputWithoutMetaFields.filter(attr => resolvedSourceColumnNamesWithoutMetaFields.contains(attr.name)).length
+            == targetOutputWithoutMetaFields.length) {
+            //If sourceTable's columns contains all targetTable's columns,
+            //We fill assign all the source fields to the target fields by column name matching.
+            targetOutputWithoutMetaFields.map(targetAttr => {
+              val sourceAttr = resolvedSourceOutputWithoutMetaFields.filter(attr => attr.name.equals(targetAttr.name)).head

Review comment:
       Can we fetch the sourceAttr by Map?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org