You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by "rmahindra123 (via GitHub)" <gi...@apache.org> on 2023/04/21 05:52:35 UTC

[GitHub] [hudi] rmahindra123 commented on a diff in pull request #8514: [HUDI-6113] Support multiple transformers using the same config keys in DeltaStreamer

rmahindra123 commented on code in PR #8514:
URL: https://github.com/apache/hudi/pull/8514#discussion_r1173328676


##########
hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java:
##########
@@ -19,24 +19,61 @@
 package org.apache.hudi.utilities.transform;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.common.util.StringUtils;
 
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;
 
 /**
  * A {@link Transformer} to chain other {@link Transformer}s and apply sequentially.
  */
 public class ChainedTransformer implements Transformer {
 
-  private List<Transformer> transformers;
+  // Delimiter used to separate class name and the property key suffix. The suffix comes first.
+  private static final String TRANSFORMER_CLASS_NAME_KEY_SUFFIX_DELIMITER = ":";
+
+  private final List<Transformer> transformers;
+  private final Map<Transformer, String> transformerToPropKeySuffix;
 
   public ChainedTransformer(List<Transformer> transformers) {
     this.transformers = transformers;
+    this.transformerToPropKeySuffix = new HashMap<>(transformers.size());
+    for (Transformer transformer : this.transformers) {
+      transformerToPropKeySuffix.put(transformer, "");
+    }
+  }
+
+  public ChainedTransformer(List<String> configuredTransformers, int... ignore) {

Review Comment:
   where is this being called from.
   Also add javadoc for the method



##########
hudi-utilities/src/main/java/org/apache/hudi/utilities/transform/ChainedTransformer.java:
##########
@@ -19,24 +19,61 @@
 package org.apache.hudi.utilities.transform;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.common.util.StringUtils;
 
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
 
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.stream.Collectors;
 
 /**
  * A {@link Transformer} to chain other {@link Transformer}s and apply sequentially.
  */
 public class ChainedTransformer implements Transformer {
 
-  private List<Transformer> transformers;
+  // Delimiter used to separate class name and the property key suffix. The suffix comes first.
+  private static final String TRANSFORMER_CLASS_NAME_KEY_SUFFIX_DELIMITER = ":";
+
+  private final List<Transformer> transformers;
+  private final Map<Transformer, String> transformerToPropKeySuffix;
 
   public ChainedTransformer(List<Transformer> transformers) {
     this.transformers = transformers;
+    this.transformerToPropKeySuffix = new HashMap<>(transformers.size());
+    for (Transformer transformer : this.transformers) {
+      transformerToPropKeySuffix.put(transformer, "");
+    }
+  }
+
+  public ChainedTransformer(List<String> configuredTransformers, int... ignore) {

Review Comment:
   where is this being called from?
   Also add javadoc for the method



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@hudi.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org