You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/02/02 18:23:35 UTC

[hudi] branch release-0.13.0 updated: [0.13.0 Only] Disable default Avro schema validation (#7802)

This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/release-0.13.0 by this push:
     new 3bcbdd09629 [0.13.0 Only] Disable default Avro schema validation (#7802)
3bcbdd09629 is described below

commit 3bcbdd096295b27ea19b5c116d02f44c793e6389
Author: Alexey Kudinkin <al...@gmail.com>
AuthorDate: Thu Feb 2 10:23:29 2023 -0800

    [0.13.0 Only] Disable default Avro schema validation (#7802)
    
    Disabling executing Avro schema validation by default.
---
 .../org/apache/hudi/config/HoodieWriteConfig.java  |  2 +-
 .../org/apache/hudi/HoodieSparkSqlWriter.scala     | 22 ++++++++++++++--------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index f56defe7eac..7e29c7dcbae 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -224,7 +224,7 @@ public class HoodieWriteConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> AVRO_SCHEMA_VALIDATE_ENABLE = ConfigProperty
       .key("hoodie.avro.schema.validate")
-      .defaultValue("true")
+      .defaultValue("false")
       .withDocumentation("Validate the schema used for the write against the latest schema, for backwards compatibility.");
 
   public static final ConfigProperty<String> SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP = ConfigProperty
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index 764f9474ee0..c62d5a491e0 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -518,14 +518,17 @@ object HoodieSparkSqlWriter {
   }
 
   def addSchemaEvolutionParameters(parameters: Map[String, String], internalSchemaOpt: Option[InternalSchema], writeSchemaOpt: Option[Schema] = None): Map[String, String] = {
-    val schemaEvolutionEnable = if (internalSchemaOpt.isDefined) "true" else "false"
+    val schemaEvolutionEnabled = if (internalSchemaOpt.isDefined) "true" else "false"
+    val schemaReconciliationEnabled = parameters.getOrDefault(DataSourceWriteOptions.RECONCILE_SCHEMA.key(),
+      DataSourceWriteOptions.RECONCILE_SCHEMA.defaultValue().toString).toBoolean
 
-    val schemaValidateEnable = if (schemaEvolutionEnable.toBoolean && parameters.getOrDefault(DataSourceWriteOptions.RECONCILE_SCHEMA.key(), "false").toBoolean) {
-      // force disable schema validate, now we support schema evolution, no need to do validate
-      "false"
+    // Force disable schema validate, now we support schema evolution, no need to do validate
+    val schemaValidationEnabledOpt = if (schemaEvolutionEnabled.toBoolean && schemaReconciliationEnabled) {
+      Some(false)
     } else  {
-      parameters.getOrDefault(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key(), "true")
+      None
     }
+
     // correct internalSchema, internalSchema should contain hoodie metadata columns.
     val correctInternalSchema = internalSchemaOpt.map { internalSchema =>
       if (internalSchema.findField(HoodieRecord.RECORD_KEY_METADATA_FIELD) == null && writeSchemaOpt.isDefined) {
@@ -535,9 +538,12 @@ object HoodieSparkSqlWriter {
         internalSchema
       }
     }
-    parameters ++ Map(HoodieWriteConfig.INTERNAL_SCHEMA_STRING.key() -> SerDeHelper.toJson(correctInternalSchema.getOrElse(null)),
-      HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key() -> schemaEvolutionEnable,
-      HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key()  -> schemaValidateEnable)
+
+    parameters ++ Map(
+      HoodieWriteConfig.INTERNAL_SCHEMA_STRING.key() -> SerDeHelper.toJson(correctInternalSchema.orNull),
+      HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key() -> schemaEvolutionEnabled
+    ) ++
+      schemaValidationEnabledOpt.map(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key() -> _.toString).toSeq
   }
 
   private def reconcileSchemasLegacy(tableSchema: Schema, newSchema: Schema): (Schema, Boolean) = {