You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2007/10/05 19:57:54 UTC
svn commit: r582366 - in /lucene/hadoop/trunk: CHANGES.txt
src/java/org/apache/hadoop/fs/FileSystem.java
src/test/org/apache/hadoop/fs/TestGlobPaths.java
Author: dhruba
Date: Fri Oct 5 10:57:53 2007
New Revision: 582366
URL: http://svn.apache.org/viewvc?rev=582366&view=rev
Log:
HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
(Hairong Kuang via dhruba)
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=582366&r1=582365&r2=582366&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Oct 5 10:57:53 2007
@@ -81,6 +81,9 @@
factories. Provide a StandardSocketFactory, and a SocksSocketFactory to
allow the use of SOCKS proxies. (taton).
+ HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
+ (Hairong Kuang via dhruba)
+
OPTIMIZATIONS
HADOOP-1910. Reduce the number of RPCs that DistributedFileSystem.create()
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java?rev=582366&r1=582365&r2=582366&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileSystem.java Fri Oct 5 10:57:53 2007
@@ -574,6 +574,14 @@
* <dt> <tt> \<i>c</i> </tt>
* <dd> Removes (escapes) any special meaning of character <i>c</i>.
*
+ * <p>
+ * <dt> <tt> {ab,cd} </tt>
+ * <dd> Matches a string from the string set <tt>{<i>ab, cd</i>} </tt>
+ *
+ * <p>
+ * <dt> <tt> {ab,c{de,fh}} </tt>
+ * <dd> Matches a string from the string set <tt>{<i>ab, cde, cfh</i>}</tt>
+ *
* </dl>
* </dd>
* </dl>
@@ -652,11 +660,18 @@
setRegex(filePattern);
}
+ private boolean isJavaRegexSpecialChar(char pChar) {
+ return pChar == '.' || pChar == '$' || pChar == '(' || pChar == ')' ||
+ pChar == '|' || pChar == '+';
+ }
void setRegex(String filePattern) throws IOException {
int len;
int setOpen;
+ int curlyOpen;
boolean setRange;
- StringBuffer fileRegex = new StringBuffer();
+ boolean expectGroup;
+
+ StringBuilder fileRegex = new StringBuilder();
// Validate the pattern
len = filePattern.length();
@@ -665,7 +680,9 @@
setOpen = 0;
setRange = false;
-
+ curlyOpen = 0;
+ expectGroup = false;
+
for (int i = 0; i < len; i++) {
char pCh;
@@ -677,7 +694,7 @@
if (i >= len)
error("An escaped character does not present", filePattern, i);
pCh = filePattern.charAt(i);
- } else if (pCh == '.') {
+ } else if (isJavaRegexSpecialChar(pCh)) {
fileRegex.append(PAT_ESCAPE);
} else if (pCh == '*') {
fileRegex.append(PAT_ANY);
@@ -685,6 +702,21 @@
} else if (pCh == '?') {
pCh = PAT_ANY;
hasPattern = true;
+ } else if (pCh == '{') {
+ fileRegex.append('(');
+ pCh = '(';
+ curlyOpen++;
+ } else if (pCh == ',' && curlyOpen > 0) {
+ fileRegex.append(")|");
+ pCh = '(';
+ expectGroup = true;
+ } else if (pCh == '}' && curlyOpen > 0) {
+ // End of a group
+ if (expectGroup)
+ error("Unexpected end of a group", filePattern, i);
+ curlyOpen--;
+ fileRegex.append(")");
+ pCh = ')';
} else if (pCh == '[' && setOpen == 0) {
setOpen++;
hasPattern = true;
@@ -704,15 +736,17 @@
// Normal character, or the end of a character set range
setOpen++;
setRange = false;
+ } else if (curlyOpen > 0) {
+ expectGroup = false;
}
fileRegex.append(pCh);
}
// Check for a well-formed pattern
- if (setOpen > 0 || setRange) {
+ if (setOpen > 0 || setRange || curlyOpen > 0) {
// Incomplete character set or character range
- error("Expecting set closure character or end of range", filePattern,
- len);
+ error("Expecting set closure character or end of range, or }",
+ filePattern, len);
}
regex = Pattern.compile(fileRegex.toString());
}
Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java?rev=582366&r1=582365&r2=582366&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/fs/TestGlobPaths.java Fri Oct 5 10:57:53 2007
@@ -43,30 +43,29 @@
}
protected void tearDown() throws Exception {
- dfsCluster.shutdown();
+ if(dfsCluster!=null) {
+ dfsCluster.shutdown();
+ }
}
- public void testGlob() {
- try {
- pTestLiteral();
- pTestAny();
- pTestClosure();
- pTestSet();
- pTestRange();
- pTestSetExcl();
- pTestCombination();
- pTestRelativePath();
- } catch(IOException e) {
- e.printStackTrace();
- }
+ public void testGlob() throws Exception {
+ //pTestEscape(); // need to wait until HADOOP-1995 is fixed
+ pTestJavaRegexSpecialChars();
+ pTestCurlyBracket();
+ pTestLiteral();
+ pTestAny();
+ pTestClosure();
+ pTestSet();
+ pTestRange();
+ pTestSetExcl();
+ pTestCombination();
+ pTestRelativePath();
}
private void pTestLiteral() throws IOException {
try {
- String [] files = new String[2];
- files[0] = USER_DIR+"/a2c";
- files[1] = USER_DIR+"/ab\\[c.d";
- Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files);
+ String [] files = new String[] {USER_DIR+"/a2c", USER_DIR+"/abc.d"};
+ Path[] matchedPath = prepareTesting(USER_DIR+"/abc.d", files);
assertEquals(matchedPath.length, 1);
assertEquals(matchedPath[0], path[1]);
} finally {
@@ -74,13 +73,21 @@
}
}
+ private void pTestEscape() throws IOException {
+ try {
+ String [] files = new String[] {USER_DIR+"/ab\\[c.d"};
+ Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files);
+ assertEquals(matchedPath.length, 1);
+ assertEquals(matchedPath[0], path[0]);
+ } finally {
+ cleanupDFS();
+ }
+ }
+
private void pTestAny() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/abc";
- files[1] = USER_DIR+"/a2c";
- files[2] = USER_DIR+"/a.c";
- files[3] = USER_DIR+"/abcd";
+ String [] files = new String[] { USER_DIR+"/abc", USER_DIR+"/a2c",
+ USER_DIR+"/a.c", USER_DIR+"/abcd"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a?c", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], path[2]);
@@ -99,11 +106,8 @@
private void pTestClosure1() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/a";
- files[1] = USER_DIR+"/abc";
- files[2] = USER_DIR+"/abc.p";
- files[3] = USER_DIR+"/bacd";
+ String [] files = new String[] {USER_DIR+"/a", USER_DIR+"/abc",
+ USER_DIR+"/abc.p", USER_DIR+"/bacd"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a*", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], path[0]);
@@ -116,11 +120,8 @@
private void pTestClosure2() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/a.";
- files[1] = USER_DIR+"/a.txt";
- files[2] = USER_DIR+"/a.old.java";
- files[3] = USER_DIR+"/.java";
+ String [] files = new String[] {USER_DIR+"/a.", USER_DIR+"/a.txt",
+ USER_DIR+"/a.old.java", USER_DIR+"/.java"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a.*", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], path[0]);
@@ -133,11 +134,8 @@
private void pTestClosure3() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/a.txt.x";
- files[1] = USER_DIR+"/ax";
- files[2] = USER_DIR+"/ab37x";
- files[3] = USER_DIR+"/bacd";
+ String [] files = new String[] {USER_DIR+"/a.txt.x", USER_DIR+"/ax",
+ USER_DIR+"/ab37x", USER_DIR+"/bacd"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a*x", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], path[0]);
@@ -150,11 +148,8 @@
private void pTestSet() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/a.c";
- files[1] = USER_DIR+"/a.cpp";
- files[2] = USER_DIR+"/a.hlp";
- files[3] = USER_DIR+"/a.hxy";
+ String [] files = new String[] {USER_DIR+"/a.c", USER_DIR+"/a.cpp",
+ USER_DIR+"/a.hlp", USER_DIR+"/a.hxy"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a.[ch]??", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], path[1]);
@@ -167,11 +162,8 @@
private void pTestRange() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/a.d";
- files[1] = USER_DIR+"/a.e";
- files[2] = USER_DIR+"/a.f";
- files[3] = USER_DIR+"/a.h";
+ String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
+ USER_DIR+"/a.f", USER_DIR+"/a.h"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a.[d-fm]", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], path[0]);
@@ -184,11 +176,8 @@
private void pTestSetExcl() throws IOException {
try {
- String [] files = new String[4];
- files[0] = USER_DIR+"/a.d";
- files[1] = USER_DIR+"/a.e";
- files[2] = USER_DIR+"/a.0";
- files[3] = USER_DIR+"/a.h";
+ String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
+ USER_DIR+"/a.0", USER_DIR+"/a.h"};
Path[] matchedPath = prepareTesting(USER_DIR+"/a.[^a-cg-z0-9]", files);
assertEquals(matchedPath.length, 2);
assertEquals(matchedPath[0], path[0]);
@@ -200,15 +189,11 @@
private void pTestCombination() throws IOException {
try {
- String [] files = new String[4];
- files[0] = "/user/aa/a.c";
- files[1] = "/user/bb/a.cpp";
- files[2] = "/user1/cc/b.hlp";
- files[3] = "/user/dd/a.hxy";
- Path[] matchedPath = prepareTesting("/use?/*/a.[ch]??", files);
- assertEquals(matchedPath.length, 2);
- assertEquals(matchedPath[0], path[1]);
- assertEquals(matchedPath[1], path[3]);
+ String [] files = new String[] {"/user/aa/a.c", "/user/bb/a.cpp",
+ "/user1/cc/b.hlp", "/user/dd/a.hxy"};
+ Path[] matchedPath = prepareTesting("/use?/*/a.[ch]{lp,xy}", files);
+ assertEquals(matchedPath.length, 1);
+ assertEquals(matchedPath[0], path[3]);
} finally {
cleanupDFS();
}
@@ -216,11 +201,7 @@
private void pTestRelativePath() throws IOException {
try {
- String [] files = new String[4];
- files[0] = "a";
- files[1] = "abc";
- files[2] = "abc.p";
- files[3] = "bacd";
+ String [] files = new String[] {"a", "abc", "abc.p", "bacd"};
Path[] matchedPath = prepareTesting("a*", files);
assertEquals(matchedPath.length, 3);
assertEquals(matchedPath[0], new Path(USER_DIR, path[0]));
@@ -231,15 +212,91 @@
}
}
+ /* Test {xx,yy} */
+ private void pTestCurlyBracket() throws IOException {
+ Path[] matchedPath;
+ String [] files;
+ try {
+ files = new String[] { USER_DIR+"/a.abcxx", USER_DIR+"/a.abxy",
+ USER_DIR+"/a.hlp", USER_DIR+"/a.jhyy"};
+ matchedPath = prepareTesting(USER_DIR+"/a.{abc,jh}??", files);
+ assertEquals(matchedPath.length, 2);
+ assertEquals(matchedPath[0], path[0]);
+ assertEquals(matchedPath[1], path[3]);
+ } finally {
+ cleanupDFS();
+ }
+ // nested curlies
+ try {
+ files = new String[] { USER_DIR+"/a.abcxx", USER_DIR+"/a.abdxy",
+ USER_DIR+"/a.hlp", USER_DIR+"/a.jhyy" };
+ matchedPath = prepareTesting(USER_DIR+"/a.{ab{c,d},jh}??", files);
+ assertEquals(matchedPath.length, 3);
+ assertEquals(matchedPath[0], path[0]);
+ assertEquals(matchedPath[1], path[1]);
+ assertEquals(matchedPath[2], path[3]);
+ } finally {
+ cleanupDFS();
+ }
+ try {
+ // test standalone }
+ files = new String[] {USER_DIR+"/}bc"};
+ matchedPath = prepareTesting(USER_DIR+"/}{a,b}c", files);
+ assertEquals(matchedPath.length, 1);
+ // test {b}
+ matchedPath = prepareTesting(USER_DIR+"/}{b}c", files);
+ assertEquals(matchedPath.length, 1);
+ // test {}
+ matchedPath = prepareTesting(USER_DIR+"}{}bc", files);
+ assertEquals(matchedPath.length, 1);
+
+ // test ill-formed curly
+ boolean hasException = false;
+ try {
+ prepareTesting(USER_DIR+"}{b,}c", files);
+ } catch (IOException e) {
+ assertTrue(e.getMessage().startsWith("Illegal file pattern:") );
+ hasException = true;
+ }
+ assertTrue(hasException);
+ hasException = false;
+ try {
+ prepareTesting(USER_DIR+"}{bc", files);
+ } catch (IOException e) {
+ assertTrue(e.getMessage().startsWith("Illegal file pattern:") );
+ hasException = true;
+ }
+ assertTrue(hasException);
+ } finally {
+ cleanupDFS();
+ }
+ }
+
+ /* test that a path name can contain Java regex special characters */
+ private void pTestJavaRegexSpecialChars() throws IOException {
+ try {
+ String[] files = new String[] {USER_DIR+"/($.|+)bc", USER_DIR+"/abc"};
+ Path[] matchedPath = prepareTesting(USER_DIR+"/($.|+)*", files);
+ assertEquals(matchedPath.length, 1);
+ assertEquals(matchedPath[0], path[0]);
+ } finally {
+ cleanupDFS();
+ }
+
+ }
private Path[] prepareTesting(String pattern, String[] files)
throws IOException {
for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
- path[i] = new Path(files[i]);
+ path[i] = new Path(files[i]).makeQualified(fs);
if (!fs.mkdirs(path[i])) {
throw new IOException("Mkdirs failed to create " + path[i].toString());
}
}
- return fs.globPaths(new Path(pattern));
+ Path[] globResults = fs.globPaths(new Path(pattern));
+ for(int i=0; i<globResults.length; i++) {
+ globResults[i] = globResults[i].makeQualified(fs);
+ }
+ return globResults;
}
private void cleanupDFS() throws IOException {