You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by ku...@apache.org on 2020/03/11 20:31:39 UTC
[incubator-gobblin] branch master updated:
[GOBBLIN-1079][Gobblin-1079] set extract.is.full property
This is an automated email from the ASF dual-hosted git repository.
kuyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 6418dcf [GOBBLIN-1079][Gobblin-1079] set extract.is.full property
6418dcf is described below
commit 6418dcfbb928aaade047ef51366595957d7bdf81
Author: Arjun <ab...@linkedin.com>
AuthorDate: Wed Mar 11 13:31:24 2020 -0700
[GOBBLIN-1079][Gobblin-1079] set extract.is.full property
Dear Gobblin maintainers,
Please accept this PR. I understand that it will
not be reviewed until I have checked off all the
steps below!
yukuai518 please review
### JIRA
- [x] My PR addresses the following [Gobblin-1079]
(https://issues.apache.org/jira/browse/GOBBLIN/)
issues and references them in the PR title.
### Description
- [x] Here are some details about my PR, including
screenshots (if applicable):
a new pull job that pulls from file based source
S3 with snapshot_only extract type.
But the job is still writing the output as _append
instead of _full .
this PR will use the appropriate configs so the
output path is correctly calculated.
### Tests
- [x] My PR adds the following unit tests __OR__
does not need testing for this extremely good
reason:
trivial changes
### Commits
- [x] My commits all reference JIRA issues in
their subject lines, and I have squashed multiple
commits if they address the same issue. In
addition, my commits follow the guidelines from
"[How to write a good git commit
message](http://chris.beams.io/posts/git-
commit/)":
1. Subject is separated from body by a blank line
2. Subject is limited to 50 characters
3. Subject does not end with a period
4. Subject uses the imperative mood ("add", not
"adding")
5. Body wraps at 72 characters
6. Body explains "what" and "why", not "how"
Closes #2918 from arjun4084346/extractIsFullFix
---
.../java/org/apache/gobblin/configuration/ConfigurationKeys.java | 1 +
.../apache/gobblin/source/extractor/filebased/FileBasedSource.java | 6 +++---
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java
index e8c04c2..6075725 100644
--- a/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java
+++ b/gobblin-api/src/main/java/org/apache/gobblin/configuration/ConfigurationKeys.java
@@ -296,6 +296,7 @@ public class ConfigurationKeys {
public static final String EXTRACT_TABLE_NAME_KEY = "extract.table.name";
public static final String EXTRACT_EXTRACT_ID_KEY = "extract.extract.id";
public static final String EXTRACT_IS_FULL_KEY = "extract.is.full";
+ public static final String DEFAULT_EXTRACT_IS_FULL = "false";
public static final String EXTRACT_FULL_RUN_TIME_KEY = "extract.full.run.time";
public static final String EXTRACT_PRIMARY_KEY_FIELDS_KEY = "extract.primary.key.fields";
public static final String EXTRACT_DELTA_FIELDS_KEY = "extract.delta.fields";
diff --git a/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java b/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java
index 1e5bdc3..941935c 100644
--- a/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java
+++ b/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/filebased/FileBasedSource.java
@@ -24,8 +24,6 @@ import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Set;
@@ -50,9 +48,9 @@ import org.apache.gobblin.dataset.DatasetDescriptor;
import org.apache.gobblin.metrics.event.lineage.LineageInfo;
import org.apache.gobblin.source.extractor.extract.AbstractSource;
import org.apache.gobblin.source.workunit.Extract;
+import org.apache.gobblin.source.workunit.Extract.TableType;
import org.apache.gobblin.source.workunit.MultiWorkUnit;
import org.apache.gobblin.source.workunit.WorkUnit;
-import org.apache.gobblin.source.workunit.Extract.TableType;
/**
@@ -192,6 +190,8 @@ public abstract class FileBasedSource<S, D> extends AbstractSource<S, D> {
SourceState extractState = new SourceState();
extractState.setProp(ConfigurationKeys.EXTRACT_ID_TIME_ZONE,
state.getProp(ConfigurationKeys.EXTRACT_ID_TIME_ZONE, ConfigurationKeys.DEFAULT_EXTRACT_ID_TIME_ZONE));
+ extractState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY,
+ state.getProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, ConfigurationKeys.DEFAULT_EXTRACT_IS_FULL));
Extract extract = new Extract(extractState, tableType, nameSpaceName, extractTableName);
WorkUnit workUnit = WorkUnit.create(extract);