You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@gobblin.apache.org by zi...@apache.org on 2021/10/18 19:02:49 UTC

[gobblin] branch master updated: [GOBBLIN-1559] Support wildcard for input paths (#3410)

This is an automated email from the ASF dual-hosted git repository.

zihanli58 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d8503e  [GOBBLIN-1559] Support wildcard for input paths (#3410)
0d8503e is described below

commit 0d8503e0e10d3cfdcc757c997a51b3e95a598c50
Author: umustafi <um...@gmail.com>
AuthorDate: Mon Oct 18 11:02:26 2021 -0700

    [GOBBLIN-1559] Support wildcard for input paths (#3410)
    
    * [GOBBLIN-1559] Support wildcard for input paths
    
    * [GOBBLIN-1559] Support wildcard for input paths
    
    * remove new check and allow 'other' to be glob
    
    * go back to adding special case for exact match of this & other
    
    Co-authored-by: Urmi Mustafi <um...@umustafi-mn1.linkedin.biz>
---
 .../service/modules/dataset/FSDatasetDescriptor.java       |  8 ++++++++
 .../service/modules/dataset/FSDatasetDescriptorTest.java   | 14 ++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java
index 57f23b1..996fa96 100644
--- a/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java
+++ b/gobblin-service/src/main/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptor.java
@@ -89,6 +89,12 @@ public class FSDatasetDescriptor extends BaseDatasetDescriptor implements Datase
   protected boolean isPathContaining(DatasetDescriptor other) {
     String otherPath = other.getPath();
     String otherSubPaths = ((FSDatasetDescriptor) other).getSubPaths();
+
+    // This allows the special case where "other" is a glob, but is also an exact match with "this" path.
+    if (getPath().equals(otherPath)) {
+      return true;
+    }
+
     if (otherSubPaths != null) {
       List<String> subPaths = Splitter.on(",").splitToList(StringUtils.stripEnd(StringUtils.stripStart(otherSubPaths, "{"), "}"));
       for (String subPath : subPaths) {
@@ -117,9 +123,11 @@ public class FSDatasetDescriptor extends BaseDatasetDescriptor implements Datase
     if (DatasetDescriptorConfigKeys.DATASET_DESCRIPTOR_CONFIG_ANY.equals(this.getPath())) {
       return true;
     }
+
     if (PathUtils.isGlob(new Path(otherPath))) {
       return false;
     }
+
     GlobPattern globPattern = new GlobPattern(this.getPath());
     return globPattern.matches(otherPath);
   }
diff --git a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java
index 88e3759..daafe56 100644
--- a/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java
+++ b/gobblin-service/src/test/java/org/apache/gobblin/service/modules/dataset/FSDatasetDescriptorTest.java
@@ -83,6 +83,20 @@ public class FSDatasetDescriptorTest {
   }
 
   @Test
+  public void testContainsMatchingPaths() throws IOException {
+    // Paths that match exactly should be accepted, and that should allow glob patterns as input paths for the self serve edges
+    Config config1 = ConfigFactory.empty().withValue(DatasetDescriptorConfigKeys.PATH_KEY, ConfigValueFactory.fromAnyRef("/a/b/c/*"))
+        .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, ConfigValueFactory.fromAnyRef("hdfs"));
+    FSDatasetDescriptor descriptor1 = new FSDatasetDescriptor(config1);
+
+    Config config2 = ConfigFactory.empty().withValue(DatasetDescriptorConfigKeys.PATH_KEY, ConfigValueFactory.fromAnyRef("/a/b/c/*"))
+        .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, ConfigValueFactory.fromAnyRef("hdfs"));
+
+    FSDatasetDescriptor descriptor2 = new FSDatasetDescriptor(config2);
+    Assert.assertTrue(descriptor1.contains(descriptor2));
+  }
+
+  @Test
   public void testEquals() throws IOException {
     Config config1 = ConfigFactory.empty().withValue(DatasetDescriptorConfigKeys.PATH_KEY, ConfigValueFactory.fromAnyRef("/a/b/c/*"))
         .withValue(DatasetDescriptorConfigKeys.PLATFORM_KEY, ConfigValueFactory.fromAnyRef("hdfs"));