You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/10/05 19:57:54 UTC

svn commit: r582366 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/fs/FileSystem.java src/test/org/apache/hadoop/fs/TestGlobPaths.java

Author: dhruba
Date: Fri Oct  5 10:57:53 2007
New Revision: 582366

URL: http://svn.apache.org/viewvc?rev=582366&view=rev
Log:
HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
(Hairong Kuang via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=582366&r1=582365&r2=582366&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Oct  5 10:57:53 2007
@@ -81,6 +81,9 @@
     factories. Provide a StandardSocketFactory, and a SocksSocketFactory to
     allow the use of SOCKS proxies. (taton).
 
+    HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
+    (Hairong Kuang via dhruba)
+
   OPTIMIZATIONS
 
     HADOOP-1910.  Reduce the number of RPCs that DistributedFileSystem.create()

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=582366&r1=582365&r2=582366&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Fri Oct  5 10:57:53 2007
@@ -574,6 +574,14 @@
    *    <dt> <tt> \<i>c</i> </tt>
    *    <dd> Removes (escapes) any special meaning of character <i>c</i>.
    *
+   *    <p>
+   *    <dt> <tt> {ab,cd} </tt>
+   *    <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
+   *    
+   *    <p>
+   *    <dt> <tt> {ab,c{de,fh}} </tt>
+   *    <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt>
+   *
    *   </dl>
    *  </dd>
    * </dl>
@@ -652,11 +660,18 @@
       setRegex(filePattern);
     }
       
+    private boolean isJavaRegexSpecialChar(char pChar) {
+      return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' ||
+             pChar == '|' || pChar == '+';
+    }
     void setRegex(String filePattern) throws IOException {
       int len;
       int setOpen;
+      int curlyOpen;
       boolean setRange;
-      StringBuffer fileRegex = new StringBuffer();
+      boolean expectGroup;
+
+      StringBuilder fileRegex = new StringBuilder();
 
       // Validate the pattern
       len = filePattern.length();
@@ -665,7 +680,9 @@
 
       setOpen = 0;
       setRange = false;
-        
+      curlyOpen = 0;
+      expectGroup = false;
+
       for (int i = 0; i < len; i++) {
         char pCh;
           
@@ -677,7 +694,7 @@
           if (i >= len)
             error("An escaped character does not present", filePattern, i);
           pCh = filePattern.charAt(i);
-        } else if (pCh == '.') {
+        } else if (isJavaRegexSpecialChar(pCh)) {
           fileRegex.append(PAT_ESCAPE);
         } else if (pCh == '*') {
           fileRegex.append(PAT_ANY);
@@ -685,6 +702,21 @@
         } else if (pCh == '?') {
           pCh = PAT_ANY;
           hasPattern = true;
+        } else if (pCh == '{') {
+          fileRegex.append('(');
+          pCh = '(';
+          curlyOpen++;
+        } else if (pCh == ',' && curlyOpen > 0) {
+          fileRegex.append(")|");
+          pCh = '(';
+          expectGroup = true;
+        } else if (pCh == '}' && curlyOpen > 0) {
+          // End of a group
+          if (expectGroup)
+            error("Unexpected end of a group", filePattern, i);
+          curlyOpen--;
+          fileRegex.append(")");
+          pCh = ')';
         } else if (pCh == '[' && setOpen == 0) {
           setOpen++;
           hasPattern = true;
@@ -704,15 +736,17 @@
           // Normal character, or the end of a character set range
           setOpen++;
           setRange = false;
+        } else if (curlyOpen > 0) {
+          expectGroup = false;
         }
         fileRegex.append(pCh);
       }
         
       // Check for a well-formed pattern
-      if (setOpen > 0 || setRange) {
+      if (setOpen > 0 || setRange || curlyOpen > 0) {
         // Incomplete character set or character range
-        error("Expecting set closure character or end of range", filePattern,
-              len);
+        error("Expecting set closure character or end of range, or }", 
+            filePattern, len);
       }
       regex = Pattern.compile(fileRegex.toString());
     }

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=582366&r1=582365&r2=582366&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Fri Oct  5 10:57:53 2007
@@ -43,30 +43,29 @@
   }
   
   protected void tearDown() throws Exception {
-    dfsCluster.shutdown();
+    if(dfsCluster!=null) {
+      dfsCluster.shutdown();
+    }
   }
   
-  public void testGlob() {
-    try {
-      pTestLiteral();
-      pTestAny();
-      pTestClosure();
-      pTestSet();
-      pTestRange();
-      pTestSetExcl();
-      pTestCombination();
-      pTestRelativePath();
-    } catch(IOException e) {
-      e.printStackTrace();
-    } 
+  public void testGlob() throws Exception {
+    //pTestEscape(); // need to wait until HADOOP-1995 is fixed
+    pTestJavaRegexSpecialChars();
+    pTestCurlyBracket();
+    pTestLiteral();
+    pTestAny();
+    pTestClosure();
+    pTestSet();
+    pTestRange();
+    pTestSetExcl();
+    pTestCombination();
+    pTestRelativePath();
   }
   
   private void pTestLiteral() throws IOException {
     try {
-      String [] files = new String[2];
-      files[0] = USER_DIR+"/a2c";
-      files[1] = USER_DIR+"/ab\\[c.d";
-      Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files);
+      String [] files = new String[] {USER_DIR+"/a2c", USER_DIR+"/abc.d"};
+      Path[] matchedPath = prepareTesting(USER_DIR+"/abc.d", files);
       assertEquals(matchedPath.length, 1);
       assertEquals(matchedPath[0], path[1]);
     } finally {
@@ -74,13 +73,21 @@
     }
   }
   
+  private void pTestEscape() throws IOException {
+    try {
+      String [] files = new String[] {USER_DIR+"/ab\\[c.d"};
+      Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files);
+      assertEquals(matchedPath.length, 1);
+      assertEquals(matchedPath[0], path[0]);
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
   private void pTestAny() throws IOException {
     try {
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/abc";
-      files[1] = USER_DIR+"/a2c";
-      files[2] = USER_DIR+"/a.c";
-      files[3] = USER_DIR+"/abcd";
+      String [] files = new String[] { USER_DIR+"/abc", USER_DIR+"/a2c",
+                                       USER_DIR+"/a.c", USER_DIR+"/abcd"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a?c", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], path[2]);
@@ -99,11 +106,8 @@
   
   private void pTestClosure1() throws IOException {
     try {
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/a";
-      files[1] = USER_DIR+"/abc";
-      files[2] = USER_DIR+"/abc.p";
-      files[3] = USER_DIR+"/bacd";
+      String [] files = new String[] {USER_DIR+"/a", USER_DIR+"/abc",
+                                      USER_DIR+"/abc.p", USER_DIR+"/bacd"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a*", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], path[0]);
@@ -116,11 +120,8 @@
   
   private void pTestClosure2() throws IOException {
     try {
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/a.";
-      files[1] = USER_DIR+"/a.txt";
-      files[2] = USER_DIR+"/a.old.java";
-      files[3] = USER_DIR+"/.java";
+      String [] files = new String[] {USER_DIR+"/a.", USER_DIR+"/a.txt",
+                                     USER_DIR+"/a.old.java", USER_DIR+"/.java"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a.*", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], path[0]);
@@ -133,11 +134,8 @@
   
   private void pTestClosure3() throws IOException {
     try {    
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/a.txt.x";
-      files[1] = USER_DIR+"/ax";
-      files[2] = USER_DIR+"/ab37x";
-      files[3] = USER_DIR+"/bacd";
+      String [] files = new String[] {USER_DIR+"/a.txt.x", USER_DIR+"/ax",
+                                      USER_DIR+"/ab37x", USER_DIR+"/bacd"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a*x", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], path[0]);
@@ -150,11 +148,8 @@
   
   private void pTestSet() throws IOException {
     try {    
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/a.c";
-      files[1] = USER_DIR+"/a.cpp";
-      files[2] = USER_DIR+"/a.hlp";
-      files[3] = USER_DIR+"/a.hxy";
+      String [] files = new String[] {USER_DIR+"/a.c", USER_DIR+"/a.cpp",
+                                      USER_DIR+"/a.hlp", USER_DIR+"/a.hxy"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a.[ch]??", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], path[1]);
@@ -167,11 +162,8 @@
   
   private void pTestRange() throws IOException {
     try {    
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/a.d";
-      files[1] = USER_DIR+"/a.e";
-      files[2] = USER_DIR+"/a.f";
-      files[3] = USER_DIR+"/a.h";
+      String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
+                                      USER_DIR+"/a.f", USER_DIR+"/a.h"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a.[d-fm]", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], path[0]);
@@ -184,11 +176,8 @@
   
   private void pTestSetExcl() throws IOException {
     try {    
-      String [] files = new String[4];
-      files[0] = USER_DIR+"/a.d";
-      files[1] = USER_DIR+"/a.e";
-      files[2] = USER_DIR+"/a.0";
-      files[3] = USER_DIR+"/a.h";
+      String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
+                                      USER_DIR+"/a.0", USER_DIR+"/a.h"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/a.[^a-cg-z0-9]", files);
       assertEquals(matchedPath.length, 2);
       assertEquals(matchedPath[0], path[0]);
@@ -200,15 +189,11 @@
 
   private void pTestCombination() throws IOException {
     try {    
-      String [] files = new String[4];
-      files[0] = "/user/aa/a.c";
-      files[1] = "/user/bb/a.cpp";
-      files[2] = "/user1/cc/b.hlp";
-      files[3] = "/user/dd/a.hxy";
-      Path[] matchedPath = prepareTesting("/use?/*/a.[ch]??", files);
-      assertEquals(matchedPath.length, 2);
-      assertEquals(matchedPath[0], path[1]);
-      assertEquals(matchedPath[1], path[3]);
+      String [] files = new String[] {"/user/aa/a.c", "/user/bb/a.cpp",
+                                      "/user1/cc/b.hlp", "/user/dd/a.hxy"};
+      Path[] matchedPath = prepareTesting("/use?/*/a.[ch]{lp,xy}", files);
+      assertEquals(matchedPath.length, 1);
+      assertEquals(matchedPath[0], path[3]);
     } finally {
       cleanupDFS();
     }
@@ -216,11 +201,7 @@
   
   private void pTestRelativePath() throws IOException {
     try {
-      String [] files = new String[4];
-      files[0] = "a";
-      files[1] = "abc";
-      files[2] = "abc.p";
-      files[3] = "bacd";
+      String [] files = new String[] {"a", "abc", "abc.p", "bacd"};
       Path[] matchedPath = prepareTesting("a*", files);
       assertEquals(matchedPath.length, 3);
       assertEquals(matchedPath[0], new Path(USER_DIR, path[0]));
@@ -231,15 +212,91 @@
     }
   }
   
+  /* Test {xx,yy} */
+  private void pTestCurlyBracket() throws IOException {
+    Path[] matchedPath;
+    String [] files;
+    try {
+      files = new String[] { USER_DIR+"/a.abcxx", USER_DIR+"/a.abxy",
+                             USER_DIR+"/a.hlp", USER_DIR+"/a.jhyy"};
+      matchedPath = prepareTesting(USER_DIR+"/a.{abc,jh}??", files);
+      assertEquals(matchedPath.length, 2);
+      assertEquals(matchedPath[0], path[0]);
+      assertEquals(matchedPath[1], path[3]);
+    } finally {
+      cleanupDFS();
+    }
+    // nested curlies
+    try {
+      files = new String[] { USER_DIR+"/a.abcxx", USER_DIR+"/a.abdxy",
+                             USER_DIR+"/a.hlp", USER_DIR+"/a.jhyy" };
+      matchedPath = prepareTesting(USER_DIR+"/a.{ab{c,d},jh}??", files);
+      assertEquals(matchedPath.length, 3);
+      assertEquals(matchedPath[0], path[0]);
+      assertEquals(matchedPath[1], path[1]);
+      assertEquals(matchedPath[2], path[3]);
+    } finally {
+      cleanupDFS();
+    }
+    try {
+      // test standalone }
+      files = new String[] {USER_DIR+"/}bc"};
+      matchedPath = prepareTesting(USER_DIR+"/}{a,b}c", files);
+      assertEquals(matchedPath.length, 1);
+      // test {b}
+      matchedPath = prepareTesting(USER_DIR+"/}{b}c", files);
+      assertEquals(matchedPath.length, 1);
+      // test {}
+      matchedPath = prepareTesting(USER_DIR+"}{}bc", files);
+      assertEquals(matchedPath.length, 1);
+
+      // test ill-formed curly
+      boolean hasException = false;
+      try {
+        prepareTesting(USER_DIR+"}{b,}c", files);
+      } catch (IOException e) {
+        assertTrue(e.getMessage().startsWith("Illegal file pattern:") );
+        hasException = true;
+      }
+      assertTrue(hasException);
+      hasException = false;
+      try {
+        prepareTesting(USER_DIR+"}{bc", files);
+      } catch (IOException e) {
+        assertTrue(e.getMessage().startsWith("Illegal file pattern:") );
+        hasException = true;
+      }
+      assertTrue(hasException);
+    } finally {
+      cleanupDFS();
+    }
+  }
+  
+  /* test that a path name can contain Java regex special characters */
+  private void pTestJavaRegexSpecialChars() throws IOException {
+    try {
+      String[] files = new String[] {USER_DIR+"/($.|+)bc", USER_DIR+"/abc"};
+      Path[] matchedPath = prepareTesting(USER_DIR+"/($.|+)*", files);
+      assertEquals(matchedPath.length, 1);
+      assertEquals(matchedPath[0], path[0]);
+    } finally {
+      cleanupDFS();
+    }
+
+  }
   private Path[] prepareTesting(String pattern, String[] files)
     throws IOException {
     for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
-      path[i] = new Path(files[i]);
+      path[i] = new Path(files[i]).makeQualified(fs);
       if (!fs.mkdirs(path[i])) {
         throw new IOException("Mkdirs failed to create " + path[i].toString());
       }
     }
-    return fs.globPaths(new Path(pattern));
+    Path[] globResults = fs.globPaths(new Path(pattern));
+    for(int i=0; i<globResults.length; i++) {
+      globResults[i] = globResults[i].makeQualified(fs);
+    }
+    return globResults;
   }
   
   private void cleanupDFS() throws IOException {