You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by GitBox <gi...@apache.org> on 2021/01/29 03:50:14 UTC

[GitHub] [hudi] garyli1019 commented on a change in pull request #2497: [HUDI-1550] Incorrect query result for MOR table when merge base data…

garyli1019 commented on a change in pull request #2497:
URL: https://github.com/apache/hudi/pull/2497#discussion_r566552361



##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -285,7 +289,14 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
 
       private def mergeRowWithLog(curRow: InternalRow, curKey: String) = {
         val historyAvroRecord = serializer.serialize(curRow).asInstanceOf[GenericRecord]
-        logRecords.get(curKey).getData.combineAndGetUpdateValue(historyAvroRecord, tableAvroSchema)
+        if (preCombineField != null) {
+          val payloadProps = new Properties()

Review comment:
       we are creating a new `Properties` in every call, can we put this outside?

##########
File path: hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
##########
@@ -108,7 +108,7 @@ public String createTable(
 
     final HoodieTableType tableType = HoodieTableType.valueOf(tableTypeStr);
     HoodieTableMetaClient.initTableType(HoodieCLI.conf, path, tableType, name, archiveFolder,
-        payloadClass, layoutVersion);
+        payloadClass, null, layoutVersion);

Review comment:
       can we add a new method `initTableType` to handle all the null being added?

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
##########
@@ -50,7 +50,8 @@ case class HoodieMergeOnReadTableState(tableStructSchema: StructType,
                                        requiredStructSchema: StructType,
                                        tableAvroSchema: String,
                                        requiredAvroSchema: String,
-                                       hoodieRealtimeFileSplits: List[HoodieMergeOnReadFileSplit])
+                                       hoodieRealtimeFileSplits: List[HoodieMergeOnReadFileSplit],
+                                       preCombineField: String)

Review comment:
       can we make this field `option` instead of using `null`?

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
##########
@@ -78,7 +78,16 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
   private val tableStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
   private val fileIndex = buildFileIndex()
-
+  private val preCombineField = {
+    val fieldFromTableConfig = metaClient.getTableConfig.getPreCombineField
+    if (fieldFromTableConfig != null) {
+      fieldFromTableConfig
+    } else if (optParams.contains(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY)) {

Review comment:
       can we use the `HoodieTableConfig` instead? or somehow translate all precombine field options into one place and deprecate others. Using the write option while reading sounds a bit odd. 

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
##########
@@ -78,7 +78,16 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
   private val tableStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
   private val fileIndex = buildFileIndex()
-
+  private val preCombineField = {
+    val fieldFromTableConfig = metaClient.getTableConfig.getPreCombineField

Review comment:
       `preCombineFieldFromTableConfig` sounds better?

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
##########
@@ -18,6 +18,8 @@
 
 package org.apache.hudi
 
+import java.util.Properties

Review comment:
       nit: this import should be in the next group

##########
File path: hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
##########
@@ -78,7 +78,16 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
   private val tableStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
   private val fileIndex = buildFileIndex()
-
+  private val preCombineField = {
+    val fieldFromTableConfig = metaClient.getTableConfig.getPreCombineField
+    if (fieldFromTableConfig != null) {

Review comment:
       If the field does not exist, will this be an empty string or null?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org