You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by zi...@apache.org on 2021/10/18 19:02:49 UTC
[gobblin] branch master updated: [GOBBLIN-1559] Support wildcard
for input paths (#3410)
This is an automated email from the ASF dual-hosted git repository.
zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git
The following commit(s) were added to refs/heads/master by this push:
new 0d8503e [GOBBLIN-1559] Support wildcard for input paths (#3410)
0d8503e is described below
commit 0d8503e0e10d3cfdcc757c997a51b3e95a598c50
Author: umustafi <um...@gmail.com>
AuthorDate: Mon Oct 18 11:02:26 2021 -0700
[GOBBLIN-1559] Support wildcard for input paths (#3410)
* [GOBBLIN-1559] Support wildcard for input paths
* [GOBBLIN-1559] Support wildcard for input paths
* remove new check and allow 'other' to be glob
* go back to adding special case for exact match of this & other
Co-authored-by: Urmi Mustafi <um...@umustafi-mn1.linkedin.biz>
---
.../service/modules/dataset/FSDatasetDescriptor.java | 8 ++++++++
.../service/modules/dataset/FSDatasetDescriptorTest.java | 14 ++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java
index 57f23b1..996fa96 100644
--- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java
+++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java
@@ -89,6 +89,12 @@ public class FSDatasetDescriptor extends BaseDatasetDescriptor implements Datase
protected boolean isPathContaining(DatasetDescriptor other) {
String otherPath = other.getPath();
String otherSubPaths = ((FSDatasetDescriptor) other).getSubPaths();
+
+ // This allows the special case where "other" is a glob, but is also an exact match with "this" path.
+ if (getPath().equals(otherPath)) {
+ return true;
+ }
+
if (otherSubPaths != null) {
List<String> subPaths = Splitter.on(",").splitToList(StringUtils.stripEnd(StringUtils.stripStart(otherSubPaths, "{"), "}"));
for (String subPath : subPaths) {
@@ -117,9 +123,11 @@ public class FSDatasetDescriptor extends BaseDatasetDescriptor implements Datase
if (DatasetDescriptorConfigKeys.DATASET_DESCRIPTOR_CONFIG_ANY.equals(this.getPath())) {
return true;
}
+
if (PathUtils.isGlob(new Path(otherPath))) {
return false;
}
+
GlobPattern globPattern = new GlobPattern(this.getPath());
return globPattern.matches(otherPath);
}
diff --git a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java
index 88e3759..daafe56 100644
--- a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java
+++ b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java
@@ -83,6 +83,20 @@ public class FSDatasetDescriptorTest {
}
@Test
+ public void testContainsMatchingPaths() throws IOException {
+ // Paths that match exactly should be accepted, and that should allow glob patterns as input paths for the self serve edges
+ Config config1 = ConfigFactory.empty().withValue(DatasetDescriptorConfigKeys.PATH_KEY, ConfigValueFactory.fromAnyRef("/a/b/c/*"))
+ .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, ConfigValueFactory.fromAnyRef("hdfs"));
+ FSDatasetDescriptor descriptor1 = new FSDatasetDescriptor(config1);
+
+ Config config2 = ConfigFactory.empty().withValue(DatasetDescriptorConfigKeys.PATH_KEY, ConfigValueFactory.fromAnyRef("/a/b/c/*"))
+ .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, ConfigValueFactory.fromAnyRef("hdfs"));
+
+ FSDatasetDescriptor descriptor2 = new FSDatasetDescriptor(config2);
+ Assert.assertTrue(descriptor1.contains(descriptor2));
+ }
+
+ @Test
public void testEquals() throws IOException {
Config config1 = ConfigFactory.empty().withValue(DatasetDescriptorConfigKeys.PATH_KEY, ConfigValueFactory.fromAnyRef("/a/b/c/*"))
.withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, ConfigValueFactory.fromAnyRef("hdfs"));