You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by le...@apache.org on 2020/03/19 21:23:22 UTC

[incubator-gobblin] branch master updated: [GOBBLIN-1089] Refactor policyChecker for extensibility

This is an automated email from the ASF dual-hosted git repository.

lesun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new fcf62d1  [GOBBLIN-1089] Refactor policyChecker for extensibility
fcf62d1 is described below

commit fcf62d1848985ca93a5891ec454de12186b05cc7
Author: Lei Sun <au...@gmail.com>
AuthorDate: Thu Mar 19 14:23:07 2020 -0700

    [GOBBLIN-1089] Refactor policyChecker for extensibility
    
    Refactor policyChecker for extensibility
    
    Make RowLevelPolicyChecker configurable
    
    Make policy list visible to derived class
    
    Address the comments
    
    Closes #2930 from autumnust/policyCheckerRefactor
---
 .../qualitychecker/row/RowLevelPolicyChecker.java  | 39 ++++++++++++++--------
 .../row/RowLevelPolicyCheckerBuilder.java          |  8 ++++-
 .../row/RowLevelQualityCheckerTest.java            | 38 +++++++++++++++++++++
 3 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyChecker.java b/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyChecker.java
index 9fa6111..08d81fe 100644
--- a/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyChecker.java
+++ b/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyChecker.java
@@ -58,6 +58,7 @@ public class RowLevelPolicyChecker<S, D> implements Closeable, FinalState, Recor
     return this.list.stream().noneMatch(x -> x.getType().equals(RowLevelPolicy.Type.ERR_FILE)) || this.allowSpeculativeExecWhenWriteErrFile;
   }
 
+  @Getter
   private final List<RowLevelPolicy> list;
   private final String stateId;
   private final FileSystem fs;
@@ -102,25 +103,35 @@ public class RowLevelPolicyChecker<S, D> implements Closeable, FinalState, Recor
     for (RowLevelPolicy p : this.list) {
       RowLevelPolicy.Result result = p.executePolicy(record);
       results.put(p, result);
+      if (!checkResult(result, p, record)) {
+        return false;
+      }
+    }
+    return true;
+  }
 
-      if (result.equals(RowLevelPolicy.Result.FAILED)) {
-        if (p.getType().equals(RowLevelPolicy.Type.FAIL)) {
-          throw new RuntimeException("RowLevelPolicy " + p + " failed on record " + record);
-        } else if (p.getType().equals(RowLevelPolicy.Type.ERR_FILE)) {
-          if (this.sampler.acceptNext()) {
-            if (!this.errFileOpen) {
-              this.writer.open(getErrFilePath(p));
-              this.writer.write(record);
-            } else {
-              this.writer.write(record);
-            }
-            this.errFileOpen = true;
+  /**
+   * Handle the result of {@link RowLevelPolicy#executePolicy(Object)}
+   */
+  protected boolean checkResult(RowLevelPolicy.Result checkResult, RowLevelPolicy p, Object record) throws IOException {
+    boolean result = true;
+    if (checkResult.equals(RowLevelPolicy.Result.FAILED)) {
+      if (p.getType().equals(RowLevelPolicy.Type.FAIL)) {
+        throw new RuntimeException("RowLevelPolicy " + p + " failed on record " + record);
+      } else if (p.getType().equals(RowLevelPolicy.Type.ERR_FILE)) {
+        if (this.sampler.acceptNext()) {
+          if (!this.errFileOpen) {
+            this.writer.open(getErrFilePath(p));
+            this.writer.write(record);
+          } else {
+            this.writer.write(record);
           }
+          this.errFileOpen = true;
         }
-        return false;
       }
+      result = false;
     }
-    return true;
+    return result;
   }
 
   Path getErrFilePath(RowLevelPolicy policy) {
diff --git a/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyCheckerBuilder.java b/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyCheckerBuilder.java
index 47f54101..8a4fa30 100644
--- a/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyCheckerBuilder.java
+++ b/gobblin-core/src/main/java/org/apache/gobblin/qualitychecker/row/RowLevelPolicyCheckerBuilder.java
@@ -22,6 +22,7 @@ import java.lang.reflect.Constructor;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.gobblin.util.reflection.GobblinConstructorUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -34,6 +35,9 @@ import org.apache.gobblin.util.WriterUtils;
 
 
 public class RowLevelPolicyCheckerBuilder {
+  public static final String ROW_LEVEL_POLICY_CHECKER_TYPE = "rowLevelPolicyCheckerType";
+  public static final String DEFAULT_ROW_LEVEL_POLICY_CHECKER_TYPE = RowLevelPolicyChecker.class.getName();
+
   private final State state;
   private final int index;
 
@@ -84,7 +88,9 @@ public class RowLevelPolicyCheckerBuilder {
 
   public RowLevelPolicyChecker build()
       throws Exception {
-    return new RowLevelPolicyChecker(createPolicyList(), this.state.getId(),
+    String klazz = this.state.contains(ROW_LEVEL_POLICY_CHECKER_TYPE)
+        ? this.state.getProp(ROW_LEVEL_POLICY_CHECKER_TYPE) : DEFAULT_ROW_LEVEL_POLICY_CHECKER_TYPE;
+    return GobblinConstructorUtils.invokeConstructor(RowLevelPolicyChecker.class, klazz, createPolicyList(), this.state.getId(),
         WriterUtils.getWriterFS(this.state, 1, 0), this.state);
   }
 }
diff --git a/gobblin-core/src/test/java/org/apache/gobblin/qualitychecker/row/RowLevelQualityCheckerTest.java b/gobblin-core/src/test/java/org/apache/gobblin/qualitychecker/row/RowLevelQualityCheckerTest.java
index 48eb818..2725e06 100644
--- a/gobblin-core/src/test/java/org/apache/gobblin/qualitychecker/row/RowLevelQualityCheckerTest.java
+++ b/gobblin-core/src/test/java/org/apache/gobblin/qualitychecker/row/RowLevelQualityCheckerTest.java
@@ -22,7 +22,10 @@ import org.apache.gobblin.configuration.State;
 import org.apache.gobblin.qualitychecker.TestConstants;
 import org.apache.gobblin.qualitychecker.TestRowLevelPolicy;
 import java.io.File;
+import java.io.Flushable;
+import java.io.IOException;
 import java.net.URI;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -31,6 +34,9 @@ import org.apache.avro.file.FileReader;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.io.DatumReader;
+import org.apache.gobblin.records.ControlMessageHandler;
+import org.apache.gobblin.records.FlushControlMessageHandler;
+import org.apache.gobblin.stream.FlushControlMessage;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -38,6 +44,7 @@ import org.testng.Assert;
 import org.testng.annotations.Test;
 
 import static org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker.ALLOW_SPECULATIVE_EXECUTION_WITH_ERR_FILE_POLICY;
+import static org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckerBuilder.ROW_LEVEL_POLICY_CHECKER_TYPE;
 
 
 @Test(groups = {"gobblin.qualitychecker"})
@@ -60,6 +67,16 @@ public class RowLevelQualityCheckerTest {
     }
   }
 
+  // Verify rowPolicyChecker is configurable.
+  public void testRowPolicyCheckerBuilder() throws Exception {
+    State state = new State();
+    state.setProp(ROW_LEVEL_POLICY_CHECKER_TYPE,
+        "org.apache.gobblin.qualitychecker.row.RowLevelQualityCheckerTest$TestRowLevelPolicyChecker");
+    RowLevelPolicyChecker checker = RowLevelPolicyCheckerBuilderFactory.newPolicyCheckerBuilder(state, 0).build();
+    Assert.assertTrue(checker instanceof TestRowLevelPolicyChecker);
+    Assert.assertTrue(checker.getMessageHandler() instanceof FlushControlMessageHandler);
+  }
+
   public void testFileNameWithTimestamp() throws Exception {
     State state = new State();
     state.setProp(ConfigurationKeys.ROW_LEVEL_POLICY_LIST, "org.apache.gobblin.qualitychecker.TestRowLevelPolicy");
@@ -121,4 +138,25 @@ public class RowLevelQualityCheckerTest {
     FileReader<GenericRecord> fileReader = DataFileReader.openReader(new File(TestConstants.TEST_FILE_NAME), reader);
     return fileReader;
   }
+
+  /**
+   * An extension of {@link RowLevelPolicyChecker} just for verifying class type when specifying derived class
+   * from configuration.
+   */
+  public static class TestRowLevelPolicyChecker extends RowLevelPolicyChecker {
+    public TestRowLevelPolicyChecker(List list, String stateId, FileSystem fs, State state) {
+      super(list, stateId, fs, state);
+    }
+
+    @Override
+    protected ControlMessageHandler getMessageHandler() {
+      return new FlushControlMessageHandler(new Flushable() {
+        @Override
+        public void flush()
+            throws IOException {
+          // do nothing
+        }
+      });
+    }
+  }
 }