You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/02/02 18:23:35 UTC
[hudi] branch release-0.13.0 updated: [0.13.0 Only] Disable default Avro schema validation (#7802)
This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/release-0.13.0 by this push:
new 3bcbdd09629 [0.13.0 Only] Disable default Avro schema validation (#7802)
3bcbdd09629 is described below
commit 3bcbdd096295b27ea19b5c116d02f44c793e6389
Author: Alexey Kudinkin <al...@gmail.com>
AuthorDate: Thu Feb 2 10:23:29 2023 -0800
[0.13.0 Only] Disable default Avro schema validation (#7802)
Disabling executing Avro schema validation by default.
---
.../org/apache/hudi/config/HoodieWriteConfig.java | 2 +-
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 22 ++++++++++++++--------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index f56defe7eac..7e29c7dcbae 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -224,7 +224,7 @@ public class HoodieWriteConfig extends HoodieConfig {
public static final ConfigProperty<String> AVRO_SCHEMA_VALIDATE_ENABLE = ConfigProperty
.key("hoodie.avro.schema.validate")
- .defaultValue("true")
+ .defaultValue("false")
.withDocumentation("Validate the schema used for the write against the latest schema, for backwards compatibility.");
public static final ConfigProperty<String> SCHEMA_ALLOW_AUTO_EVOLUTION_COLUMN_DROP = ConfigProperty
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index 764f9474ee0..c62d5a491e0 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -518,14 +518,17 @@ object HoodieSparkSqlWriter {
}
def addSchemaEvolutionParameters(parameters: Map[String, String], internalSchemaOpt: Option[InternalSchema], writeSchemaOpt: Option[Schema] = None): Map[String, String] = {
- val schemaEvolutionEnable = if (internalSchemaOpt.isDefined) "true" else "false"
+ val schemaEvolutionEnabled = if (internalSchemaOpt.isDefined) "true" else "false"
+ val schemaReconciliationEnabled = parameters.getOrDefault(DataSourceWriteOptions.RECONCILE_SCHEMA.key(),
+ DataSourceWriteOptions.RECONCILE_SCHEMA.defaultValue().toString).toBoolean
- val schemaValidateEnable = if (schemaEvolutionEnable.toBoolean && parameters.getOrDefault(DataSourceWriteOptions.RECONCILE_SCHEMA.key(), "false").toBoolean) {
- // force disable schema validate, now we support schema evolution, no need to do validate
- "false"
+ // Force disable schema validate, now we support schema evolution, no need to do validate
+ val schemaValidationEnabledOpt = if (schemaEvolutionEnabled.toBoolean && schemaReconciliationEnabled) {
+ Some(false)
} else {
- parameters.getOrDefault(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key(), "true")
+ None
}
+
// correct internalSchema, internalSchema should contain hoodie metadata columns.
val correctInternalSchema = internalSchemaOpt.map { internalSchema =>
if (internalSchema.findField(HoodieRecord.RECORD_KEY_METADATA_FIELD) == null && writeSchemaOpt.isDefined) {
@@ -535,9 +538,12 @@ object HoodieSparkSqlWriter {
internalSchema
}
}
- parameters ++ Map(HoodieWriteConfig.INTERNAL_SCHEMA_STRING.key() -> SerDeHelper.toJson(correctInternalSchema.getOrElse(null)),
- HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key() -> schemaEvolutionEnable,
- HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key() -> schemaValidateEnable)
+
+ parameters ++ Map(
+ HoodieWriteConfig.INTERNAL_SCHEMA_STRING.key() -> SerDeHelper.toJson(correctInternalSchema.orNull),
+ HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key() -> schemaEvolutionEnabled
+ ) ++
+ schemaValidationEnabledOpt.map(HoodieWriteConfig.AVRO_SCHEMA_VALIDATE_ENABLE.key() -> _.toString).toSeq
}
private def reconcileSchemasLegacy(tableSchema: Schema, newSchema: Schema): (Schema, Boolean) = {