You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by jl...@apache.org on 2012/10/12 22:00:39 UTC

svn commit: r1397709 - /hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java

Author: jlowe
Date: Fri Oct 12 20:00:39 2012
New Revision: 1397709

URL: http://svn.apache.org/viewvc?rev=1397709&view=rev
Log:
HADOOP-8906. paths with multiple globs are unreliable. Contributed by Daryn Sharp.

Modified:
    hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java

Modified: hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java?rev=1397709&r1=1397708&r2=1397709&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java (original)
+++ hadoop/common/branches/branch-0.23/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java Fri Oct 12 20:00:39 2012
@@ -17,52 +17,409 @@
  */
 package org.apache.hadoop.fs;
 
+import static org.junit.Assert.*;
+
 import java.io.IOException;
 import java.util.regex.Pattern;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.*;
 
-import junit.framework.TestCase;
+public class TestGlobPaths {
 
-public class TestGlobPaths extends TestCase {
-  
   static class RegexPathFilter implements PathFilter {
-    
+
     private final String regex;
     public RegexPathFilter(String regex) {
       this.regex = regex;
     }
 
+    @Override
     public boolean accept(Path path) {
       return path.toString().matches(regex);
     }
 
   }
-  
+
   static private MiniDFSCluster dfsCluster;
   static private FileSystem fs;
   static final private int NUM_OF_PATHS = 4;
-  static final String USER_DIR = "/user/"+System.getProperty("user.name");
+  static private String USER_DIR;
   private Path[] path = new Path[NUM_OF_PATHS];
-  
-  protected void setUp() throws Exception {
-    try {
-      Configuration conf = new HdfsConfiguration();
-      dfsCluster = new MiniDFSCluster.Builder(conf).build();
-      fs = FileSystem.get(conf);
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    Configuration conf = new HdfsConfiguration();
+    dfsCluster = new MiniDFSCluster.Builder(conf).build();
+    fs = FileSystem.get(conf);
+    USER_DIR = fs.getHomeDirectory().toUri().getPath().toString();
   }
   
-  protected void tearDown() throws Exception {
+  @AfterClass
+  public static void tearDown() throws Exception {
     if(dfsCluster!=null) {
       dfsCluster.shutdown();
     }
   }
+
+  @Test
+  public void testMultiGlob() throws IOException {
+    FileStatus[] status;
+    /*
+     *  /dir1/subdir1
+     *  /dir1/subdir1/f1
+     *  /dir1/subdir1/f2
+     *  /dir1/subdir2/f1
+     *  /dir2/subdir1
+     *  /dir2/subdir2
+     *  /dir2/subdir2/f1
+     *  /dir3/f1
+     *  /dir3/f1
+     *  /dir3/f2(dir)
+     *  /dir3/subdir2(file)
+     *  /dir3/subdir3
+     *  /dir3/subdir3/f1
+     *  /dir3/subdir3/f1/f1
+     *  /dir3/subdir3/f3
+     *  /dir4
+     */
+
+    Path d1 = new Path(USER_DIR, "dir1");
+    Path d11 = new Path(d1, "subdir1");
+    Path d12 = new Path(d1, "subdir2");
+    
+    Path f111 = new Path(d11, "f1");
+    fs.createNewFile(f111);
+    Path f112 = new Path(d11, "f2");
+    fs.createNewFile(f112);
+    Path f121 = new Path(d12, "f1");
+    fs.createNewFile(f121);
+    
+    Path d2 = new Path(USER_DIR, "dir2");
+    Path d21 = new Path(d2, "subdir1");
+    fs.mkdirs(d21);
+    Path d22 = new Path(d2, "subdir2");
+    Path f221 = new Path(d22, "f1");
+    fs.createNewFile(f221);
+
+    Path d3 = new Path(USER_DIR, "dir3");
+    Path f31 = new Path(d3, "f1");
+    fs.createNewFile(f31);
+    Path d32 = new Path(d3, "f2");
+    fs.mkdirs(d32);
+    Path f32 = new Path(d3, "subdir2"); // fake as a subdir!
+    fs.createNewFile(f32);
+    Path d33 = new Path(d3, "subdir3");
+    Path f333 = new Path(d33, "f3");
+    fs.createNewFile(f333);
+    Path d331 = new Path(d33, "f1");
+    Path f3311 = new Path(d331, "f1");
+    fs.createNewFile(f3311);
+    Path d4 = new Path(USER_DIR, "dir4");
+    fs.mkdirs(d4);
+
+    /*
+     * basic 
+     */
+    Path root = new Path(USER_DIR);
+    status = fs.globStatus(root);
+    checkStatus(status, root);
+    
+    status = fs.globStatus(new Path(USER_DIR, "x"));
+    assertNull(status);
+
+    status = fs.globStatus(new Path("x"));
+    assertNull(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "x/x"));
+    assertNull(status);
+
+    status = fs.globStatus(new Path("x/x"));
+    assertNull(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "*"));
+    checkStatus(status, d1, d2, d3, d4);
+
+    status = fs.globStatus(new Path("*"));
+    checkStatus(status, d1, d2, d3, d4);
+
+    status = fs.globStatus(new Path(USER_DIR, "*/x"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("*/x"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "x/*"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("x/*"));
+    checkStatus(status);
+
+    // make sure full pattern is scanned instead of bailing early with undef
+    status = fs.globStatus(new Path(USER_DIR, "x/x/x/*"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("x/x/x/*"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "*/*"));
+    checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+    status = fs.globStatus(new Path("*/*"));
+    checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+    /*
+     * one level deep
+     */
+    status = fs.globStatus(new Path(USER_DIR, "dir*/*"));
+    checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+    status = fs.globStatus(new Path("dir*/*"));
+    checkStatus(status, d11, d12, d21, d22, f31, d32, f32, d33);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*"));
+    checkStatus(status, d11, d12, d21, d22, f32, d33);
+
+    status = fs.globStatus(new Path("dir*/subdir*"));
+    checkStatus(status, d11, d12, d21, d22, f32, d33);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/f*"));
+    checkStatus(status, f31, d32);
+
+    status = fs.globStatus(new Path("dir*/f*"));
+    checkStatus(status, f31, d32);
+
+    /*
+     * subdir1 globs
+     */
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1"));
+    checkStatus(status, d11, d21);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/*"));
+    checkStatus(status, f111, f112);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/*/*"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/x"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/x*"));
+    checkStatus(status);
+
+    /*
+     * subdir2 globs
+     */
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir2"));
+    checkStatus(status, d12, d22, f32);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir2/*"));
+    checkStatus(status, f121, f221);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir2/*/*"));
+    checkStatus(status);
+
+    /*
+     * subdir3 globs
+     */
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3"));
+    checkStatus(status, d33);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3/*"));
+    checkStatus(status, d331, f333); 
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3/*/*"));
+    checkStatus(status, f3311);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir3/*/*/*"));
+    checkStatus(status);
+
+    /*
+     * file1 single dir globs
+     */    
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1"));
+    checkStatus(status, f111);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1*"));
+    checkStatus(status, f111);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1/*"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f1*/*"));
+    checkStatus(status);
+
+    /*
+     * file1 multi-dir globs
+     */
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1"));
+    checkStatus(status, f111, f121, f221, d331);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*"));
+    checkStatus(status, f111, f121, f221, d331);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1/*"));
+    checkStatus(status, f3311);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/*"));
+    checkStatus(status, f3311);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/*"));
+    checkStatus(status, f3311);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/x"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f1*/*/*"));
+    checkStatus(status);
+
+    /*
+     *  file glob multiple files
+     */
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*"));
+    checkStatus(status, d11, d12, d21, d22, f32, d33);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/*"));
+    checkStatus(status, f111, f112, f121, f221, d331, f333); 
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f*"));
+    checkStatus(status, f111, f112, f121, f221, d331, f333);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/f*/*"));
+    checkStatus(status, f3311);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/*/f1"));
+    checkStatus(status, f3311); 
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir*/*/*"));
+    checkStatus(status, f3311); 
+
+
+    // doesn't exist
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f3"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path(USER_DIR, "dir*/subdir1/f3*"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("{x}"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("{x,y}"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("dir*/{x,y}"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("dir*/{f1,y}"));
+    checkStatus(status, f31);
+
+    status = fs.globStatus(new Path("{x,y}"));
+    checkStatus(status);
+    
+    status = fs.globStatus(new Path("/{x/x,y/y}"));
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("{x/x,y/y}"));
+    checkStatus(status);
+    
+    status = fs.globStatus(new Path(Path.CUR_DIR));
+    checkStatus(status, new Path(USER_DIR));
+
+    status = fs.globStatus(new Path(USER_DIR+"{/dir1}"));
+    checkStatus(status, d1);
+
+    status = fs.globStatus(new Path(USER_DIR+"{/dir*}"));
+    checkStatus(status, d1, d2, d3, d4);
+
+    /* 
+     * true filter
+     */
+
+    PathFilter trueFilter = new PathFilter() {
+      @Override
+      public boolean accept(Path path) {
+        return true;
+      }
+    };
+
+    status = fs.globStatus(new Path(Path.SEPARATOR), trueFilter);
+    checkStatus(status, new Path(Path.SEPARATOR));
+    
+    status = fs.globStatus(new Path(Path.CUR_DIR), trueFilter);
+    checkStatus(status, new Path(USER_DIR));    
+
+    status = fs.globStatus(d1, trueFilter);
+    checkStatus(status, d1);
+
+    status = fs.globStatus(new Path(USER_DIR), trueFilter);
+    checkStatus(status, new Path(USER_DIR));
+
+    status = fs.globStatus(new Path(USER_DIR, "*"), trueFilter);
+    checkStatus(status, d1, d2, d3, d4);
+
+    status = fs.globStatus(new Path("/x/*"), trueFilter);
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("/x"), trueFilter);
+    assertNull(status);
+
+    status = fs.globStatus(new Path("/x/x"), trueFilter);
+    assertNull(status);
+    
+    /*
+     * false filter
+     */
+    PathFilter falseFilter = new PathFilter() {
+      @Override
+      public boolean accept(Path path) {
+        return false;
+      }
+    };
+
+    status = fs.globStatus(new Path(Path.SEPARATOR), falseFilter);
+    assertNull(status);
+    
+    status = fs.globStatus(new Path(Path.CUR_DIR), falseFilter);
+    assertNull(status);    
+    
+    status = fs.globStatus(new Path(USER_DIR), falseFilter);
+    assertNull(status);
+    
+    status = fs.globStatus(new Path(USER_DIR, "*"), falseFilter);
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("/x/*"), falseFilter);
+    checkStatus(status);
+
+    status = fs.globStatus(new Path("/x"), falseFilter);
+    assertNull(status);
+
+    status = fs.globStatus(new Path("/x/x"), falseFilter);
+    assertNull(status);
+  }
+  
+  private void checkStatus(FileStatus[] status, Path ... expectedMatches) {
+    assertNotNull(status);
+    String[] paths = new String[status.length];
+    for (int i=0; i < status.length; i++) {
+      paths[i] = getPathFromStatus(status[i]);
+    }
+    String got = StringUtils.join(paths, "\n");
+    String expected = StringUtils.join(expectedMatches, "\n");
+    assertEquals(expected, got);
+  }
+
+  private String getPathFromStatus(FileStatus status) {
+    return status.getPath().toUri().getPath();
+  }
   
+  
+  @Test
   public void testPathFilter() throws IOException {
     try {
       String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b" };
@@ -75,6 +432,7 @@ public class TestGlobPaths extends TestC
     }
   }
   
+  @Test
   public void testPathFilterWithFixedLastComponent() throws IOException {
     try {
       String[] files = new String[] { USER_DIR + "/a", USER_DIR + "/a/b",
@@ -88,21 +446,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  public void testGlob() throws Exception {
-    //pTestEscape(); // need to wait until HADOOP-1995 is fixed
-    pTestJavaRegexSpecialChars();
-    pTestCurlyBracket();
-    pTestLiteral();
-    pTestAny();
-    pTestClosure();
-    pTestSet();
-    pTestRange();
-    pTestSetExcl();
-    pTestCombination();
-    pTestRelativePath();
-  }
-  
-  private void pTestLiteral() throws IOException {
+  @Test
+  public void pTestLiteral() throws IOException {
     try {
       String [] files = new String[] {USER_DIR+"/a2c", USER_DIR+"/abc.d"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/abc.d", files);
@@ -113,7 +458,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestEscape() throws IOException {
+  @Test
+  public void pTestEscape() throws IOException {
     try {
       String [] files = new String[] {USER_DIR+"/ab\\[c.d"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/ab\\[c.d", files);
@@ -124,7 +470,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestAny() throws IOException {
+  @Test
+  public void pTestAny() throws IOException {
     try {
       String [] files = new String[] { USER_DIR+"/abc", USER_DIR+"/a2c",
                                        USER_DIR+"/a.c", USER_DIR+"/abcd"};
@@ -138,15 +485,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestClosure() throws IOException {
-    pTestClosure1();
-    pTestClosure2();
-    pTestClosure3();
-    pTestClosure4();
-    pTestClosure5();
-  }
-  
-  private void pTestClosure1() throws IOException {
+  @Test
+  public void pTestClosure1() throws IOException {
     try {
       String [] files = new String[] {USER_DIR+"/a", USER_DIR+"/abc",
                                       USER_DIR+"/abc.p", USER_DIR+"/bacd"};
@@ -160,7 +500,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestClosure2() throws IOException {
+  @Test
+  public void pTestClosure2() throws IOException {
     try {
       String [] files = new String[] {USER_DIR+"/a.", USER_DIR+"/a.txt",
                                      USER_DIR+"/a.old.java", USER_DIR+"/.java"};
@@ -174,7 +515,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestClosure3() throws IOException {
+  @Test
+  public void pTestClosure3() throws IOException {
     try {    
       String [] files = new String[] {USER_DIR+"/a.txt.x", USER_DIR+"/ax",
                                       USER_DIR+"/ab37x", USER_DIR+"/bacd"};
@@ -188,7 +530,8 @@ public class TestGlobPaths extends TestC
     } 
   }
 
-  private void pTestClosure4() throws IOException {
+  @Test
+  public void pTestClosure4() throws IOException {
     try {
       String [] files = new String[] {USER_DIR+"/dir1/file1", 
                                       USER_DIR+"/dir2/file2", 
@@ -202,7 +545,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestClosure5() throws IOException {
+  @Test
+  public void pTestClosure5() throws IOException {
     try {
       String [] files = new String[] {USER_DIR+"/dir1/file1", 
                                       USER_DIR+"/file1"};
@@ -214,7 +558,8 @@ public class TestGlobPaths extends TestC
     }
   }
 
-  private void pTestSet() throws IOException {
+  @Test
+  public void pTestSet() throws IOException {
     try {    
       String [] files = new String[] {USER_DIR+"/a.c", USER_DIR+"/a.cpp",
                                       USER_DIR+"/a.hlp", USER_DIR+"/a.hxy"};
@@ -228,7 +573,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestRange() throws IOException {
+  @Test
+  public void pTestRange() throws IOException {
     try {    
       String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
                                       USER_DIR+"/a.f", USER_DIR+"/a.h"};
@@ -242,7 +588,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestSetExcl() throws IOException {
+  @Test
+  public void pTestSetExcl() throws IOException {
     try {    
       String [] files = new String[] {USER_DIR+"/a.d", USER_DIR+"/a.e",
                                       USER_DIR+"/a.0", USER_DIR+"/a.h"};
@@ -255,7 +602,8 @@ public class TestGlobPaths extends TestC
     }
   }
 
-  private void pTestCombination() throws IOException {
+  @Test
+  public void pTestCombination() throws IOException {
     try {    
       String [] files = new String[] {"/user/aa/a.c", "/user/bb/a.cpp",
                                       "/user1/cc/b.hlp", "/user/dd/a.hxy"};
@@ -267,7 +615,8 @@ public class TestGlobPaths extends TestC
     }
   }
   
-  private void pTestRelativePath() throws IOException {
+  @Test
+  public void pTestRelativePath() throws IOException {
     try {
       String [] files = new String[] {"a", "abc", "abc.p", "bacd"};
       Path[] matchedPath = prepareTesting("a*", files);
@@ -281,7 +630,8 @@ public class TestGlobPaths extends TestC
   }
   
   /* Test {xx,yy} */
-  private void pTestCurlyBracket() throws IOException {
+  @Test
+  public void pTestCurlyBracket() throws IOException {
     Path[] matchedPath;
     String [] files;
     try {
@@ -380,7 +730,8 @@ public class TestGlobPaths extends TestC
   }
   
   /* test that a path name can contain Java regex special characters */
-  private void pTestJavaRegexSpecialChars() throws IOException {
+  @Test
+  public void pTestJavaRegexSpecialChars() throws IOException {
     try {
       String[] files = new String[] {USER_DIR+"/($.|+)bc", USER_DIR+"/abc"};
       Path[] matchedPath = prepareTesting(USER_DIR+"/($.|+)*", files);
@@ -391,6 +742,7 @@ public class TestGlobPaths extends TestC
     }
 
   }
+  
   private Path[] prepareTesting(String pattern, String[] files)
     throws IOException {
     for(int i=0; i<Math.min(NUM_OF_PATHS, files.length); i++) {
@@ -427,8 +779,9 @@ public class TestGlobPaths extends TestC
     return globResults;
   }
   
-  private void cleanupDFS() throws IOException {
-    fs.delete(new Path("/user"), true);
+  @After
+  public void cleanupDFS() throws IOException {
+    fs.delete(new Path(USER_DIR), true);
   }
   
 }