You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by jl...@apache.org on 2012/10/12 22:00:39 UTC
svn commit: r1397709 -
/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
Author: jlowe
Date: Fri Oct 12 20:00:39 2012
New Revision: 1397709
URL: http://svn.apache.org/viewvc?rev=1397709&view=rev
Log:
HADOOP-8906. paths with multiple globs are unreliable. Contributed by Daryn Sharp.
Modified:
hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java?rev=1397709&r1=1397708&r2=1397709&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java Fri Oct 12 20:00:39 2012
@@ -17,52 +17,409 @@
*/
package org.apache.hadoop.fs;
+import static org.junit.Assert.*;
+
import java.io.IOException;
import java.util.regex.Pattern;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.*;
-import junit.framework.TestCase;
+public class TestGlobPaths {
-public class TestGlobPaths extends TestCase {
-
static class RegexPathFilter implements PathFilter {
-
+
private final String regex;
public RegexPathFilter(String regex) {
this.regex = regex;
}
+ @Override
public boolean accept(Path path) {
return path.toString().matches(regex);
}
}
-
+
static private MiniDFSCluster dfsCluster;
static private FileSystem fs;
static final private int NUM_OF_PATHS = 4;
- static final String USER_DIR = "/user/"+System.getProperty("user.name");
+ static private String USER_DIR;
private Path[] path = new Path[NUM_OF_PATHS];
-
- protected void setUp() throws Exception {
- try {
- Configuration conf = new HdfsConfiguration();
- dfsCluster = new MiniDFSCluster.Builder(conf).build();
- fs = FileSystem.get(conf);
- } catch (IOException e) {
- e.printStackTrace();
- }
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ Configuration conf = new HdfsConfiguration();
+ dfsCluster = new MiniDFSCluster.Builder(conf).build();
+ fs = FileSystem.get(conf);
+ USER_DIR = fs.getHomeDirectory().toUri().getPath().toString();
}
- protected void tearDown() throws Exception {
+ @AfterClass
+ public static void tearDown() throws Exception {
if(dfsCluster!=null) {
dfsCluster.shutdown();
}
}
+
+ @Test
+ public void testMultiGlob() throws IOException {
+ FileStatus[] status;
+ /*
+ * /dir1/subdir1
+ * /dir1/subdir1/f1
+ * /dir1/subdir1/f2
+ * /dir1/subdir2/f1
+ * /dir2/subdir1
+ * /dir2/subdir2
+ * /dir2/subdir2/f1
+ * /dir3/f1
+ * /dir3/f1
+ * /dir3/f2(dir)
+ * /dir3/subdir2(file)
+ * /dir3/subdir3
+ * /dir3/subdir3/f1
+ * /dir3/subdir3/f1/f1
+ * /dir3/subdir3/f3
+ * /dir4
+ */
+
+ Path d1 = new Path(USER_DIR, "dir1");
+ Path d11 = new Path(d1, "subdir1");
+ Path d12 = new Path(d1, "subdir2");
+
+ Path f111 = new Path(d11, "f1");
+ fs.createNewFile(f111);
+ Path f112 = new Path(d11, "f2");
+ fs.createNewFile(f112);
+ Path f121 = new Path(d12, "f1");
+ fs.createNewFile(f121);
+
+ Path d2 = new Path(USER_DIR, "dir2");
+ Path d21 = new Path(d2, "subdir1");
+ fs.mkdirs(d21);
+ Path d22 = new Path(d2, "subdir2");
+ Path f221 = new Path(d22, "f1");
+ fs.createNewFile(f221);
+
+ Path d3 = new Path(USER_DIR, "dir3");
+ Path f31 = new Path(d3, "f1");
+ fs.createNewFile(f31);
+ Path d32 = new Path(d3, "f2");
+ fs.mkdirs(d32);
+ Path f32 = new Path(d3, "subdir2"); // fake as a subdir!
+ fs.createNewFile(f32);
+ Path d33 = new Path(d3, "subdir3");
+ Path f333 = new Path(d33, "f3");
+ fs.createNewFile(f333);
+ Path d331 = new Path(d33, "f1");
+ Path f3311 = new Path(d331, "f1");
+ fs.createNewFile(f3311);
+ Path d4 = new Path(USER_DIR, "dir4");
+ fs.mkdirs(d4);
+
+ /*
+ * basic
+ */
+ Path root = new Path(USER_DIR);
+ status = fs.globStatus(root);
+ checkStatus(status, root);
+
+ status = fs.globStatus(new Path(USER_DIR, "x"));
+ assertNull(status);
+
+ status = fs.globStatus(new Path("x"));
+ assertNull(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "x/x"));
+ assertNull(status);
+
+ status = fs.globStatus(new Path("x/x"));
+ assertNull(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "*"));
+ checkStatus(status, d1, d2, d3, d4);
+
+ status = fs.globStatus(new Path("*"));
+ checkStatus(status, d1, d2, d3, d4);
+
+ status = fs.globStatus(new Path(USER_DIR, "*/x"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("*/x"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "x/*"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("x/*"));
+ checkStatus(status);
+
+ // make sure full pattern is scanned instead of bailing early with undef
+ status = fs.globStatus(new Path(USER_DIR, "x/x/x/*"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("x/x/x/*"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "*/*"));
+ checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+ status = fs.globStatus(new Path("*/*"));
+ checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+ /*
+ * one level deep
+ */
+ status = fs.globStatus(new Path(USER_DIR, "dir*/*"));
+ checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+ status = fs.globStatus(new Path("dir*/*"));
+ checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*"));
+ checkStatus(status, d11, d12, d21, d22, f32, d33);
+
+ status = fs.globStatus(new Path("dir*/subdir*"));
+ checkStatus(status, d11, d12, d21, d22, f32, d33);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/f*"));
+ checkStatus(status, f31, d32);
+
+ status = fs.globStatus(new Path("dir*/f*"));
+ checkStatus(status, f31, d32);
+
+ /*
+ * subdir1 globs
+ */
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1"));
+ checkStatus(status, d11, d21);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/*"));
+ checkStatus(status, f111, f112);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/*/*"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/x"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/x*"));
+ checkStatus(status);
+
+ /*
+ * subdir2 globs
+ */
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir2"));
+ checkStatus(status, d12, d22, f32);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir2/*"));
+ checkStatus(status, f121, f221);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir2/*/*"));
+ checkStatus(status);
+
+ /*
+ * subdir3 globs
+ */
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3"));
+ checkStatus(status, d33);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3/*"));
+ checkStatus(status, d331, f333);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3/*/*"));
+ checkStatus(status, f3311);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3/*/*/*"));
+ checkStatus(status);
+
+ /*
+ * file1 single dir globs
+ */
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1"));
+ checkStatus(status, f111);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1*"));
+ checkStatus(status, f111);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1/*"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1*/*"));
+ checkStatus(status);
+
+ /*
+ * file1 multi-dir globs
+ */
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1"));
+ checkStatus(status, f111, f121, f221, d331);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*"));
+ checkStatus(status, f111, f121, f221, d331);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1/*"));
+ checkStatus(status, f3311);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/*"));
+ checkStatus(status, f3311);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/*"));
+ checkStatus(status, f3311);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/x"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/*/*"));
+ checkStatus(status);
+
+ /*
+ * file glob multiple files
+ */
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*"));
+ checkStatus(status, d11, d12, d21, d22, f32, d33);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/*"));
+ checkStatus(status, f111, f112, f121, f221, d331, f333);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f*"));
+ checkStatus(status, f111, f112, f121, f221, d331, f333);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f*/*"));
+ checkStatus(status, f3311);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/*/f1"));
+ checkStatus(status, f3311);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/*/*"));
+ checkStatus(status, f3311);
+
+
+ // doesn't exist
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f3"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f3*"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("{x}"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("{x,y}"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("dir*/{x,y}"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("dir*/{f1,y}"));
+ checkStatus(status, f31);
+
+ status = fs.globStatus(new Path("{x,y}"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("/{x/x,y/y}"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("{x/x,y/y}"));
+ checkStatus(status);
+
+ status = fs.globStatus(new Path(Path.CUR_DIR));
+ checkStatus(status, new Path(USER_DIR));
+
+ status = fs.globStatus(new Path(USER_DIR+"{/dir1}"));
+ checkStatus(status, d1);
+
+ status = fs.globStatus(new Path(USER_DIR+"{/dir*}"));
+ checkStatus(status, d1, d2, d3, d4);
+
+ /*
+ * true filter
+ */
+
+ PathFilter trueFilter = new PathFilter() {
+ @Override
+ public boolean accept(Path path) {
+ return true;
+ }
+ };
+
+ status = fs.globStatus(new Path(Path.SEPARATOR), trueFilter);
+ checkStatus(status, new Path(Path.SEPARATOR));
+
+ status = fs.globStatus(new Path(Path.CUR_DIR), trueFilter);
+ checkStatus(status, new Path(USER_DIR));
+
+ status = fs.globStatus(d1, trueFilter);
+ checkStatus(status, d1);
+
+ status = fs.globStatus(new Path(USER_DIR), trueFilter);
+ checkStatus(status, new Path(USER_DIR));
+
+ status = fs.globStatus(new Path(USER_DIR, "*"), trueFilter);
+ checkStatus(status, d1, d2, d3, d4);
+
+ status = fs.globStatus(new Path("/x/*"), trueFilter);
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("/x"), trueFilter);
+ assertNull(status);
+
+ status = fs.globStatus(new Path("/x/x"), trueFilter);
+ assertNull(status);
+
+ /*
+ * false filter
+ */
+ PathFilter falseFilter = new PathFilter() {
+ @Override
+ public boolean accept(Path path) {
+ return false;
+ }
+ };
+
+ status = fs.globStatus(new Path(Path.SEPARATOR), falseFilter);
+ assertNull(status);
+
+ status = fs.globStatus(new Path(Path.CUR_DIR), falseFilter);
+ assertNull(status);
+
+ status = fs.globStatus(new Path(USER_DIR), falseFilter);
+ assertNull(status);
+
+ status = fs.globStatus(new Path(USER_DIR, "*"), falseFilter);
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("/x/*"), falseFilter);
+ checkStatus(status);
+
+ status = fs.globStatus(new Path("/x"), falseFilter);
+ assertNull(status);
+
+ status = fs.globStatus(new Path("/x/x"), falseFilter);
+ assertNull(status);
+ }
+
+ private void checkStatus(FileStatus[] status, Path ... expectedMatches) {
+ assertNotNull(status);
+ String[] paths = new String[status.length];
+ for (int i=0; i < status.length; i++) {
+ paths[i] = getPathFromStatus(status[i]);
+ }
+ String got = StringUtils.join(paths, "\n");
+ String expected = StringUtils.join(expectedMatches, "\n");
+ assertEquals(expected, got);
+ }
+
+ private String getPathFromStatus(FileStatus status) {
+ return status.getPath().toUri().getPath();
+ }
+
+ @Test
public void testPathFilter() throws IOException {
try {
String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b" };
@@ -75,6 +432,7 @@ public class TestGlobPaths extends TestC
}
}
+ @Test
public void testPathFilterWithFixedLastComponent() throws IOException {
try {
String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b",
@@ -88,21 +446,8 @@ public class TestGlobPaths extends TestC
}
}
- public void testGlob() throws Exception {
- //pTestEscape(); // need to wait until HADOOP-1995 is fixed
- pTestJavaRegexSpecialChars();
- pTestCurlyBracket();
- pTestLiteral();
- pTestAny();
- pTestClosure();
- pTestSet();
- pTestRange();
- pTestSetExcl();
- pTestCombination();
- pTestRelativePath();
- }
-
- private void pTestLiteral() throws IOException {
+ @Test
+ public void pTestLiteral() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a2c", USER_DIR+"/abc.d"};
Path[] matchedPath = prepareTesting(USER_DIR+"/abc.d", files);
@@ -113,7 +458,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestEscape() throws IOException {
+ @Test
+ public void pTestEscape() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/ab\\[c.d"};
Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files);
@@ -124,7 +470,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestAny() throws IOException {
+ @Test
+ public void pTestAny() throws IOException {
try {
String [] files = new String[] { USER_DIR+"/abc", USER_DIR+"/a2c",
USER_DIR+"/a.c", USER_DIR+"/abcd"};
@@ -138,15 +485,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestClosure() throws IOException {
- pTestClosure1();
- pTestClosure2();
- pTestClosure3();
- pTestClosure4();
- pTestClosure5();
- }
-
- private void pTestClosure1() throws IOException {
+ @Test
+ public void pTestClosure1() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a", USER_DIR+"/abc",
USER_DIR+"/abc.p", USER_DIR+"/bacd"};
@@ -160,7 +500,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestClosure2() throws IOException {
+ @Test
+ public void pTestClosure2() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a.", USER_DIR+"/a.txt",
USER_DIR+"/a.old.java", USER_DIR+"/.java"};
@@ -174,7 +515,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestClosure3() throws IOException {
+ @Test
+ public void pTestClosure3() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a.txt.x", USER_DIR+"/ax",
USER_DIR+"/ab37x", USER_DIR+"/bacd"};
@@ -188,7 +530,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestClosure4() throws IOException {
+ @Test
+ public void pTestClosure4() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/dir1/file1",
USER_DIR+"/dir2/file2",
@@ -202,7 +545,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestClosure5() throws IOException {
+ @Test
+ public void pTestClosure5() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/dir1/file1",
USER_DIR+"/file1"};
@@ -214,7 +558,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestSet() throws IOException {
+ @Test
+ public void pTestSet() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a.c", USER_DIR+"/a.cpp",
USER_DIR+"/a.hlp", USER_DIR+"/a.hxy"};
@@ -228,7 +573,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestRange() throws IOException {
+ @Test
+ public void pTestRange() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
USER_DIR+"/a.f", USER_DIR+"/a.h"};
@@ -242,7 +588,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestSetExcl() throws IOException {
+ @Test
+ public void pTestSetExcl() throws IOException {
try {
String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
USER_DIR+"/a.0", USER_DIR+"/a.h"};
@@ -255,7 +602,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestCombination() throws IOException {
+ @Test
+ public void pTestCombination() throws IOException {
try {
String [] files = new String[] {"/user/aa/a.c", "/user/bb/a.cpp",
"/user1/cc/b.hlp", "/user/dd/a.hxy"};
@@ -267,7 +615,8 @@ public class TestGlobPaths extends TestC
}
}
- private void pTestRelativePath() throws IOException {
+ @Test
+ public void pTestRelativePath() throws IOException {
try {
String [] files = new String[] {"a", "abc", "abc.p", "bacd"};
Path[] matchedPath = prepareTesting("a*", files);
@@ -281,7 +630,8 @@ public class TestGlobPaths extends TestC
}
/* Test {xx,yy} */
- private void pTestCurlyBracket() throws IOException {
+ @Test
+ public void pTestCurlyBracket() throws IOException {
Path[] matchedPath;
String [] files;
try {
@@ -380,7 +730,8 @@ public class TestGlobPaths extends TestC
}
/* test that a path name can contain Java regex special characters */
- private void pTestJavaRegexSpecialChars() throws IOException {
+ @Test
+ public void pTestJavaRegexSpecialChars() throws IOException {
try {
String[] files = new String[] {USER_DIR+"/($.|+)bc", USER_DIR+"/abc"};
Path[] matchedPath = prepareTesting(USER_DIR+"/($.|+)*", files);
@@ -391,6 +742,7 @@ public class TestGlobPaths extends TestC
}
}
+
private Path[] prepareTesting(String pattern, String[] files)
throws IOException {
for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
@@ -427,8 +779,9 @@ public class TestGlobPaths extends TestC
return globResults;
}
- private void cleanupDFS() throws IOException {
- fs.delete(new Path("/user"), true);
+ @After
+ public void cleanupDFS() throws IOException {
+ fs.delete(new Path(USER_DIR), true);
}
}