You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "wecharyu (via GitHub)" <gi...@apache.org> on 2023/03/21 08:36:17 UTC

[GitHub] [hudi] wecharyu commented on a diff in pull request #8219: [HUDI-5949] Check the write operation configured by user for better troubleshooting

wecharyu commented on code in PR #8219:
URL: https://github.com/apache/hudi/pull/8219#discussion_r1143032159


##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala:
##########
@@ -141,28 +141,30 @@ trait ProvidesHoodieConfig extends Logging {
     // NOTE: Target operation could be overridden by the user, therefore if it has been provided as an input
     //       we'd prefer that value over auto-deduced operation. Otherwise, we deduce target operation type
     val operationOverride = combinedOpts.get(DataSourceWriteOptions.OPERATION.key)
-    val operation = operationOverride.getOrElse {
-      (enableBulkInsert, isOverwritePartition, isOverwriteTable, dropDuplicate, isNonStrictMode, isPartitionedTable) match {
-        case (true, _, _, _, false, _) =>
-          throw new IllegalArgumentException(s"Table with primaryKey can not use bulk insert in ${insertMode.value()} mode.")
-        case (true, true, _, _, _, true) =>
-          throw new IllegalArgumentException(s"Insert Overwrite Partition can not use bulk insert.")
-        case (true, _, _, true, _, _) =>
-          throw new IllegalArgumentException(s"Bulk insert cannot support drop duplication." +
-            s" Please disable $INSERT_DROP_DUPS and try again.")
-        // if enableBulkInsert is true, use bulk insert for the insert overwrite non-partitioned table.
-        case (true, false, true, _, _, false) => BULK_INSERT_OPERATION_OPT_VAL
-        // insert overwrite table
-        case (false, false, true, _, _, _) => INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
-        // insert overwrite partition
-        case (_, true, false, _, _, true) => INSERT_OVERWRITE_OPERATION_OPT_VAL
-        // disable dropDuplicate, and provide preCombineKey, use the upsert operation for strict and upsert mode.
-        case (false, false, false, false, false, _) if hasPrecombineColumn => UPSERT_OPERATION_OPT_VAL
-        // if table is pk table and has enableBulkInsert use bulk insert for non-strict mode.
-        case (true, _, _, _, true, _) => BULK_INSERT_OPERATION_OPT_VAL
-        // for the rest case, use the insert operation
-        case _ => INSERT_OPERATION_OPT_VAL
-      }
+    val operation = (operationOverride, enableBulkInsert, isOverwritePartition,
+                     isOverwriteTable, dropDuplicate, isNonStrictMode, isPartitionedTable, hasPrecombineColumn) match {
+      case (Some(UPSERT_OPERATION_OPT_VAL), _, _, _, _, _, _, false) =>
+        throw new IllegalArgumentException(s"Table without preCombineKey can not use upsert operation.")
+      case (Some(operation), _, _, _, _, _, _, _)  => operation

Review Comment:
   We put the operationOverride get logic after some specific check. @danny0405 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org