You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by le...@apache.org on 2022/01/04 02:00:33 UTC

[hudi] branch master updated: [HUDI-3140] Fix bulk_insert failure on Spark 3.2.0 (#4498)

This is an automated email from the ASF dual-hosted git repository.

leesf pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 29ab6fb  [HUDI-3140] Fix bulk_insert failure on Spark 3.2.0 (#4498)
29ab6fb is described below

commit 29ab6fb9adb7e4697bd0bb2d32aa59351650f613
Author: leesf <49...@qq.com>
AuthorDate: Tue Jan 4 09:59:59 2022 +0800

    [HUDI-3140] Fix bulk_insert failure on Spark 3.2.0 (#4498)
---
 .../main/java/org/apache/hudi/spark3/internal/DefaultSource.java | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
index 63c09e0..b124853 100644
--- a/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
+++ b/hudi-spark-datasource/hudi-spark3/src/main/java/org/apache/hudi/spark3/internal/DefaultSource.java
@@ -31,6 +31,7 @@ import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
+import java.util.HashMap;
 import java.util.Map;
 
 import static org.apache.hudi.DataSourceUtils.mayBeOverwriteParquetWriteLegacyFormatProp;
@@ -55,11 +56,13 @@ public class DefaultSource extends BaseDefaultSource implements TableProvider {
         HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()));
     boolean arePartitionRecordsSorted = Boolean.parseBoolean(properties.getOrDefault(HoodieInternalConfig.BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED,
         Boolean.toString(HoodieInternalConfig.DEFAULT_BULKINSERT_ARE_PARTITIONER_RECORDS_SORTED)));
+    // Create a new map as the properties is an unmodifiableMap on Spark 3.2.0
+    Map<String, String> newProps = new HashMap<>(properties);
     // Auto set the value of "hoodie.parquet.writeLegacyFormat.enabled"
-    mayBeOverwriteParquetWriteLegacyFormatProp(properties, schema);
+    mayBeOverwriteParquetWriteLegacyFormatProp(newProps, schema);
     // 1st arg to createHoodieConfig is not really required to be set. but passing it anyways.
-    HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(properties.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, properties);
+    HoodieWriteConfig config = DataSourceUtils.createHoodieConfig(newProps.get(HoodieWriteConfig.AVRO_SCHEMA_STRING.key()), path, tblName, newProps);
     return new HoodieDataSourceInternalTable(instantTime, config, schema, getSparkSession(),
-        getConfiguration(), properties, populateMetaFields, arePartitionRecordsSorted);
+        getConfiguration(), newProps, populateMetaFields, arePartitionRecordsSorted);
   }
 }