You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2008/09/05 12:41:44 UTC

svn commit: r692407 - in /hadoop/core/trunk: CHANGES.txt src/core/org/apache/hadoop/fs/FileSystem.java src/core/org/apache/hadoop/fs/GlobExpander.java src/test/org/apache/hadoop/fs/TestGlobExpander.java src/test/org/apache/hadoop/fs/TestGlobPaths.java

Author: tomwhite
Date: Fri Sep  5 03:41:43 2008
New Revision: 692407

URL: http://svn.apache.org/viewvc?rev=692407&view=rev
Log:
HADOOP-3498. File globbing alternation should be able to span path components.

Added:
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java
    hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java
Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
    hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=692407&r1=692406&r2=692407&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri Sep  5 03:41:43 2008
@@ -267,6 +267,9 @@
     HADOOP-3943. Remove unnecessary synchronization in 
     NetworkTopology.pseudoSortByDistance. (hairong via omalley)
 
+    HADOOP-3498. File globbing alternation should be able to span path
+    components. (tomwhite)
+
   OPTIMIZATIONS
 
     HADOOP-3556. Removed lock contention in MD5Hash by changing the 

Modified: hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java?rev=692407&r1=692406&r2=692407&view=diff
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java (original)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/FileSystem.java Fri Sep  5 03:41:43 2008
@@ -843,7 +843,7 @@
   public FileStatus[] globStatus(Path pathPattern) throws IOException {
     return globStatus(pathPattern, DEFAULT_FILTER);
   }
-
+  
   /**
    * Return an array of FileStatus objects whose path names match pathPattern
    * and is accepted by the user-supplied path filter. Results are sorted by
@@ -860,6 +860,24 @@
    */
   public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
       throws IOException {
+    String filename = pathPattern.toUri().getPath();
+    List<String> filePatterns = GlobExpander.expand(filename);
+    if (filePatterns.size() == 1) {
+      return globStatusInternal(pathPattern, filter);
+    } else {
+      List<FileStatus> results = new ArrayList<FileStatus>();
+      for (String filePattern : filePatterns) {
+        FileStatus[] files = globStatusInternal(new Path(filePattern), filter);
+        for (FileStatus file : files) {
+          results.add(file);
+        }
+      }
+      return results.toArray(new FileStatus[results.size()]);
+    }
+  }
+
+  private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter)
+      throws IOException {
     Path[] parents = new Path[1];
     int level = 0;
     String filename = pathPattern.toUri().getPath();

Added: hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java?rev=692407&view=auto
==============================================================================
--- hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java (added)
+++ hadoop/core/trunk/src/core/org/apache/hadoop/fs/GlobExpander.java Fri Sep  5 03:41:43 2008
@@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+class GlobExpander {
+  
+  static class StringWithOffset {
+    String string;
+    int offset;
+    public StringWithOffset(String string, int offset) {
+      super();
+      this.string = string;
+      this.offset = offset;
+    }
+  }
+  
+  /**
+   * Expand globs in the given <code>filePattern</code> into a collection of 
+   * file patterns so that in the expanded set no file pattern has a
+   * slash character ("/") in a curly bracket pair.
+   * @param filePattern
+   * @return expanded file patterns
+   * @throws IOException 
+   */
+  public static List<String> expand(String filePattern) throws IOException {
+    List<String> fullyExpanded = new ArrayList<String>();
+    List<StringWithOffset> toExpand = new ArrayList<StringWithOffset>();
+    toExpand.add(new StringWithOffset(filePattern, 0));
+    while (!toExpand.isEmpty()) {
+      StringWithOffset path = toExpand.remove(0);
+      List<StringWithOffset> expanded = expandLeftmost(path);
+      if (expanded == null) {
+        fullyExpanded.add(path.string);
+      } else {
+        toExpand.addAll(0, expanded);
+      }
+    }
+    return fullyExpanded;
+  }
+  
+  /**
+   * Expand the leftmost outer curly bracket pair containing a
+   * slash character ("/") in <code>filePattern</code>.
+   * @param filePattern
+   * @return expanded file patterns
+   * @throws IOException 
+   */
+  private static List<StringWithOffset> expandLeftmost(StringWithOffset
+      filePatternWithOffset) throws IOException {
+    
+    String filePattern = filePatternWithOffset.string;
+    int leftmost = leftmostOuterCurlyContainingSlash(filePattern,
+        filePatternWithOffset.offset);
+    if (leftmost == -1) {
+      return null;
+    }
+    int curlyOpen = 0;
+    StringBuilder prefix = new StringBuilder(filePattern.substring(0, leftmost));
+    StringBuilder suffix = new StringBuilder();
+    List<String> alts = new ArrayList<String>();
+    StringBuilder alt = new StringBuilder();
+    StringBuilder cur = prefix;
+    for (int i = leftmost; i < filePattern.length(); i++) {
+      char c = filePattern.charAt(i);
+      if (cur == suffix) {
+        cur.append(c);
+      } else if (c == '\\') {
+        i++;
+        if (i >= filePattern.length()) {
+          throw new IOException("Illegal file pattern: "
+              + "An escaped character does not present for glob "
+              + filePattern + " at " + i);
+        }
+        c = filePattern.charAt(i);
+        cur.append(c);
+      } else if (c == '{') {
+        if (curlyOpen++ == 0) {
+          alt.setLength(0);
+          cur = alt;
+        } else {
+          cur.append(c);
+        }
+
+      } else if (c == '}' && curlyOpen > 0) {
+        if (--curlyOpen == 0) {
+          alts.add(alt.toString());
+          alt.setLength(0);
+          cur = suffix;
+        } else {
+          cur.append(c);
+        }
+      } else if (c == ',') {
+        if (curlyOpen == 1) {
+          alts.add(alt.toString());
+          alt.setLength(0);
+        } else {
+          cur.append(c);
+        }
+      } else {
+        cur.append(c);
+      }
+    }
+    List<StringWithOffset> exp = new ArrayList<StringWithOffset>();
+    for (String string : alts) {
+      exp.add(new StringWithOffset(prefix + string + suffix, prefix.length()));
+    }
+    return exp;
+  }
+  
+  /**
+   * Finds the index of the leftmost opening curly bracket containing a
+   * slash character ("/") in <code>filePattern</code>.
+   * @param filePattern
+   * @return the index of the leftmost opening curly bracket containing a
+   * slash character ("/"), or -1 if there is no such bracket
+   * @throws IOException 
+   */
+  private static int leftmostOuterCurlyContainingSlash(String filePattern,
+      int offset) throws IOException {
+    int curlyOpen = 0;
+    int leftmost = -1;
+    boolean seenSlash = false;
+    for (int i = offset; i < filePattern.length(); i++) {
+      char c = filePattern.charAt(i);
+      if (c == '\\') {
+        i++;
+        if (i >= filePattern.length()) {
+          throw new IOException("Illegal file pattern: "
+              + "An escaped character does not present for glob "
+              + filePattern + " at " + i);
+        }
+      } else if (c == '{') {
+        if (curlyOpen++ == 0) {
+          leftmost = i;
+        }
+      } else if (c == '}' && curlyOpen > 0) {
+        if (--curlyOpen == 0 && leftmost != -1 && seenSlash) {
+          return leftmost;
+        }
+      } else if (c == '/' && curlyOpen > 0) {
+        seenSlash = true;
+      }
+    }
+    return -1;
+  }
+
+}

Added: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java?rev=692407&view=auto
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java (added)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobExpander.java Fri Sep  5 03:41:43 2008
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class TestGlobExpander extends TestCase {
+
+  public void testExpansionIsIdentical() throws IOException {
+    checkExpansionIsIdentical("");
+    checkExpansionIsIdentical("/}");
+    checkExpansionIsIdentical("/}{a,b}");
+    checkExpansionIsIdentical("{/");
+    checkExpansionIsIdentical("{a}");
+    checkExpansionIsIdentical("{a,b}/{b,c}");
+    checkExpansionIsIdentical("p\\{a/b,c/d\\}s");
+    checkExpansionIsIdentical("p{a\\/b,c\\/d}s");
+  }
+
+  public void testExpansion() throws IOException {
+    checkExpansion("{a/b}", "a/b");
+    checkExpansion("/}{a/b}", "/}a/b");
+    checkExpansion("p{a/b,c/d}s", "pa/bs", "pc/ds");
+    checkExpansion("{a/b,c/d,{e,f}}", "a/b", "c/d", "{e,f}");
+    checkExpansion("{a/b,c/d}{e,f}", "a/b{e,f}", "c/d{e,f}");
+    checkExpansion("{a,b}/{b,{c/d,e/f}}", "{a,b}/b", "{a,b}/c/d", "{a,b}/e/f");
+    checkExpansion("{a,b}/{c/\\d}", "{a,b}/c/d");
+  }
+
+  private void checkExpansionIsIdentical(String filePattern) throws IOException {
+    checkExpansion(filePattern, filePattern);
+  }
+
+  private void checkExpansion(String filePattern, String... expectedExpansions)
+      throws IOException {
+    List<String> actualExpansions = GlobExpander.expand(filePattern);
+    assertEquals("Different number of expansions", expectedExpansions.length,
+        actualExpansions.size());
+    for (int i = 0; i < expectedExpansions.length; i++) {
+      assertEquals("Expansion of " + filePattern, expectedExpansions[i],
+          actualExpansions.get(i));
+    }
+  }
+}

Modified: hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=692407&r1=692406&r2=692407&view=diff
==============================================================================
--- hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ hadoop/core/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Fri Sep  5 03:41:43 2008
@@ -266,6 +266,28 @@
     } finally {
       cleanupDFS();
     }
+    // cross-component curlies
+    try {
+      files = new String[] { USER_DIR+"/a/b", USER_DIR+"/a/d",
+                             USER_DIR+"/c/b", USER_DIR+"/c/d" };
+      matchedPath = prepareTesting(USER_DIR+"/{a/b,c/d}", files);
+      assertEquals(matchedPath.length, 2);
+      assertEquals(matchedPath[0], path[0]);
+      assertEquals(matchedPath[1], path[3]);
+    } finally {
+      cleanupDFS();
+    }
+    // cross-component absolute curlies
+    try {
+      files = new String[] { "/a/b", "/a/d",
+                             "/c/b", "/c/d" };
+      matchedPath = prepareTesting("{/a/b,/c/d}", files);
+      assertEquals(matchedPath.length, 2);
+      assertEquals(matchedPath[0], path[0]);
+      assertEquals(matchedPath[1], path[3]);
+    } finally {
+      cleanupDFS();
+    }
     try {
       // test standalone }
       files = new String[] {USER_DIR+"/}bc", USER_DIR+"/}c"};