You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by ku...@apache.org on 2020/03/11 20:31:39 UTC

[incubator-gobblin] branch master updated: [GOBBLIN-1079][Gobblin-1079] set extract.is.full property

This is an automated email from the ASF dual-hosted git repository.

kuyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 6418dcf  [GOBBLIN-1079][Gobblin-1079] set extract.is.full property
6418dcf is described below

commit 6418dcfbb928aaade047ef51366595957d7bdf81
Author: Arjun <ab...@linkedin.com>
AuthorDate: Wed Mar 11 13:31:24 2020 -0700

    [GOBBLIN-1079][Gobblin-1079] set extract.is.full property
    
    Dear Gobblin maintainers,
    
    Please accept this PR. I understand that it will
    not be reviewed until I have checked off all the
    steps below!
    yukuai518  please review
    
    ### JIRA
    - [x] My PR addresses the following [Gobblin-1079]
    (https://issues.apache.org/jira/browse/GOBBLIN/)
    issues and references them in the PR title.
    
    ### Description
    - [x] Here are some details about my PR, including
    screenshots (if applicable):
    a new pull job that pulls from file based source
    S3 with snapshot_only extract type.
    But the job is still writing the output as _append
    instead of _full .
    this PR will use the appropriate configs so the
    output path is correctly calculated.
    
    ### Tests
    - [x] My PR adds the following unit tests __OR__
    does not need testing for this extremely good
    reason:
    trivial changes
    
    ### Commits
    - [x] My commits all reference JIRA issues in
    their subject lines, and I have squashed multiple
    commits if they address the same issue. In
    addition, my commits follow the guidelines from
    "[How to write a good git commit
    message](http://chris.beams.io/posts/git-
    commit/)":
        1. Subject is separated from body by a blank line
        2. Subject is limited to 50 characters
        3. Subject does not end with a period
        4. Subject uses the imperative mood ("add", not
    "adding")
        5. Body wraps at 72 characters
        6. Body explains "what" and "why", not "how"
    
    Closes #2918 from arjun4084346/extractIsFullFix
---
 .../java/org/apache/gobblin/configuration/ConfigurationKeys.java    | 1 +
 .../apache/gobblin/source/extractor/filebased/FileBasedSource.java  | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java
index e8c04c2..6075725 100644
--- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java
+++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java
@@ -296,6 +296,7 @@ public class ConfigurationKeys {
   public static final String EXTRACT_TABLE_NAME_KEY = "extract.table.name";
   public static final String EXTRACT_EXTRACT_ID_KEY = "extract.extract.id";
   public static final String EXTRACT_IS_FULL_KEY = "extract.is.full";
+  public static final String DEFAULT_EXTRACT_IS_FULL = "false";
   public static final String EXTRACT_FULL_RUN_TIME_KEY = "extract.full.run.time";
   public static final String EXTRACT_PRIMARY_KEY_FIELDS_KEY = "extract.primary.key.fields";
   public static final String EXTRACT_DELTA_FIELDS_KEY = "extract.delta.fields";
diff --git a/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java b/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java
index 1e5bdc3..941935c 100644
--- a/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java
+++ b/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java
@@ -24,8 +24,6 @@ import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
@@ -50,9 +48,9 @@ import org.apache.gobblin.dataset.DatasetDescriptor;
 import org.apache.gobblin.metrics.event.lineage.LineageInfo;
 import org.apache.gobblin.source.extractor.extract.AbstractSource;
 import org.apache.gobblin.source.workunit.Extract;
+import org.apache.gobblin.source.workunit.Extract.TableType;
 import org.apache.gobblin.source.workunit.MultiWorkUnit;
 import org.apache.gobblin.source.workunit.WorkUnit;
-import org.apache.gobblin.source.workunit.Extract.TableType;
 
 
 /**
@@ -192,6 +190,8 @@ public abstract class FileBasedSource<S, D> extends AbstractSource<S, D> {
         SourceState extractState = new SourceState();
         extractState.setProp(ConfigurationKeys.EXTRACT_ID_TIME_ZONE,
                 state.getProp(ConfigurationKeys.EXTRACT_ID_TIME_ZONE, ConfigurationKeys.DEFAULT_EXTRACT_ID_TIME_ZONE));
+        extractState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY,
+                state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, ConfigurationKeys.DEFAULT_EXTRACT_IS_FULL));
         Extract extract = new Extract(extractState, tableType, nameSpaceName, extractTableName);
 
         WorkUnit workUnit = WorkUnit.create(extract);