You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/03/22 00:17:48 UTC

[hudi] branch master updated: [HUDI-5781] Refactor SQL transformer configs to use HoodieConfig and ConfigProperty (#8155)

This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 749a93ba269 [HUDI-5781] Refactor SQL transformer configs to use HoodieConfig and ConfigProperty (#8155)
749a93ba269 is described below

commit 749a93ba269f2f7648be10d61b4f479dd5518d3e
Author: Lokesh Jain <lj...@apache.org>
AuthorDate: Wed Mar 22 05:47:40 2023 +0530

    [HUDI-5781] Refactor SQL transformer configs to use HoodieConfig and ConfigProperty (#8155)
    
    This commit moves the configs for SqlFileBasedTransformer and SqlQueryBasedTransformer to SqlTransformerConfig extending HoodieConfig and using ConfigProperty, so that when we generate the documentation of configurations for Hudi website, these configs are surfaced.
    
    Co-authored-by: Y Ethan Guo <et...@gmail.com>
---
 .../apache/hudi/common/config/ConfigGroups.java    |  3 +-
 .../utilities/config/SqlTransformerConfig.java     | 46 ++++++++++++++++++++++
 .../transform/SqlFileBasedTransformer.java         | 16 +++-----
 .../transform/SqlQueryBasedTransformer.java        | 13 ++----
 4 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java
index 43647da88eb..bd4589b82b1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigGroups.java
@@ -37,7 +37,8 @@ public class ConfigGroups {
     METRICS("Metrics Configs"),
     RECORD_PAYLOAD("Record Payload Config"),
     KAFKA_CONNECT("Kafka Connect Configs"),
-    AWS("Amazon Web Services Configs");
+    AWS("Amazon Web Services Configs"),
+    DELTA_STREAMER("DeltaStreamer Configs");
 
     public final String name;
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/SqlTransformerConfig.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/SqlTransformerConfig.java
new file mode 100644
index 00000000000..c1509a0edcf
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/config/SqlTransformerConfig.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.config;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import javax.annotation.concurrent.Immutable;
+
+/**
+ * DeltaStreamer SQL Transformer Configs
+ */
+@Immutable
+@ConfigClassProperty(name = "DeltaStreamer SQL Transformer Configs",
+    groupName = ConfigGroups.Names.DELTA_STREAMER,
+    description = "Configurations controlling the behavior of SQL transformer in Deltastreamer.")
+public class SqlTransformerConfig extends HoodieConfig {
+  public static final ConfigProperty<String> TRANSFORMER_SQL_FILE = ConfigProperty
+      .key("hoodie.deltastreamer.transformer.sql.file")
+      .noDefaultValue()
+      .withDocumentation("File with a SQL script to be executed during write");
+
+  public static final ConfigProperty<String> TRANSFORMER_SQL = ConfigProperty
+      .key("hoodie.deltastreamer.transformer.sql")
+      .noDefaultValue()
+      .withDocumentation("SQL Query to be executed during write");
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java
index a53b50431c8..598e3f138af 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlFileBasedTransformer.java
@@ -18,11 +18,13 @@
 
 package org.apache.hudi.utilities.transform;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.utilities.config.SqlTransformerConfig;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -66,10 +68,10 @@ public class SqlFileBasedTransformer implements Transformer {
       final Dataset<Row> rowDataset,
       final TypedProperties props) {
 
-    final String sqlFile = props.getString(Config.TRANSFORMER_SQL_FILE);
+    final String sqlFile = props.getString(SqlTransformerConfig.TRANSFORMER_SQL_FILE.key());
     if (null == sqlFile) {
       throw new IllegalArgumentException(
-          "Missing required configuration : (" + Config.TRANSFORMER_SQL_FILE + ")");
+          "Missing required configuration : (" + SqlTransformerConfig.TRANSFORMER_SQL_FILE.key() + ")");
     }
 
     final FileSystem fs = FSUtils.getFs(sqlFile, jsc.hadoopConfiguration(), true);
@@ -99,10 +101,4 @@ public class SqlFileBasedTransformer implements Transformer {
       sparkSession.catalog().dropTempView(tmpTable);
     }
   }
-
-  /** Configs supported. */
-  private static class Config {
-
-    private static final String TRANSFORMER_SQL_FILE = "hoodie.deltastreamer.transformer.sql.file";
-  }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java
index e39ca746314..dc4d0a70220 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/SqlQueryBasedTransformer.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.utilities.transform;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.utilities.config.SqlTransformerConfig;
 
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -41,20 +42,12 @@ public class SqlQueryBasedTransformer implements Transformer {
   private static final String SRC_PATTERN = "<SRC>";
   private static final String TMP_TABLE = "HOODIE_SRC_TMP_TABLE_";
 
-  /**
-   * Configs supported.
-   */
-  static class Config {
-
-    private static final String TRANSFORMER_SQL = "hoodie.deltastreamer.transformer.sql";
-  }
-
   @Override
   public Dataset<Row> apply(JavaSparkContext jsc, SparkSession sparkSession, Dataset<Row> rowDataset,
       TypedProperties properties) {
-    String transformerSQL = properties.getString(Config.TRANSFORMER_SQL);
+    String transformerSQL = properties.getString(SqlTransformerConfig.TRANSFORMER_SQL.key());
     if (null == transformerSQL) {
-      throw new IllegalArgumentException("Missing configuration : (" + Config.TRANSFORMER_SQL + ")");
+      throw new IllegalArgumentException("Missing configuration : (" + SqlTransformerConfig.TRANSFORMER_SQL.key() + ")");
     }
 
     // tmp table name doesn't like dashes