You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2014/11/19 02:10:14 UTC

hadoop git commit: HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.

Repository: hadoop
Updated Branches:
  refs/heads/trunk 9e81be011 -> 79301e80d


HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/79301e80
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/79301e80
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/79301e80

Branch: refs/heads/trunk
Commit: 79301e80d7510f055c01a06970bb409607a4197c
Parents: 9e81be0
Author: cnauroth <cn...@apache.org>
Authored: Tue Nov 18 17:05:48 2014 -0800
Committer: cnauroth <cn...@apache.org>
Committed: Tue Nov 18 17:05:48 2014 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |  3 +
 .../org/apache/hadoop/tools/HadoopArchives.java | 17 ++---
 .../apache/hadoop/tools/TestHadoopArchives.java | 77 +++++++++++++++++---
 3 files changed, 75 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/79301e80/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 013353a..a15a8c1 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -452,6 +452,9 @@ Release 2.7.0 - UNRELEASED
 
     HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
 
+    HADOOP-11201. Hadoop Archives should support globs resolving to files.
+    (Gera Shegalov via cnauroth)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/79301e80/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
index e53576d..aa30277 100644
--- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
@@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
   short repl = 10;
 
   private static final String usage = "archive"
-  + " -archiveName NAME -p <parent path> [-r <replication factor>]" +
+  + " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
       "<src>* <dest>" +
   "\n";
   
@@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
    */
   private void writeTopLevelDirs(SequenceFile.Writer srcWriter, 
       List<Path> paths, Path parentPath) throws IOException {
-    //add all the directories 
-    List<Path> justDirs = new ArrayList<Path>();
+    // extract paths from absolute URI's
+    List<Path> justPaths = new ArrayList<Path>();
     for (Path p: paths) {
-      if (!p.getFileSystem(getConf()).isFile(p)) {
-        justDirs.add(new Path(p.toUri().getPath()));
-      }
-      else {
-        justDirs.add(new Path(p.getParent().toUri().getPath()));
-      }
+      justPaths.add(new Path(p.toUri().getPath()));
     }
     /* find all the common parents of paths that are valid archive
      * paths. The below is done so that we do not add a common path
@@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
     Path root = new Path(Path.SEPARATOR);
     for (int i = parentPath.depth(); i < deepest.depth(); i++) {
       List<Path> parents = new ArrayList<Path>();
-      for (Path p: justDirs) {
+      for (Path p: justPaths) {
         if (p.compareTo(root) == 0){
           //do nothing
         }
@@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
           }
         }
       }
-      justDirs = parents;
+      justPaths = parents;
     }
     Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
     for (Map.Entry<String, HashSet<String>> entry : keyVals) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/79301e80/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
index e7eef3f..3fa5919 100644
--- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
@@ -203,9 +203,58 @@ public class TestHadoopArchives {
     Assert.assertEquals(originalPaths, harPaths);
   }
 
-  private static List<String> lsr(final FsShell shell, String dir)
-      throws Exception {
-    System.out.println("lsr root=" + dir);
+  @Test
+  public void testSingleFile() throws Exception {
+    final Path sub1 = new Path(inputPath, "dir1");
+    fs.mkdirs(sub1);
+    String singleFileName = "a";
+    createFile(inputPath, fs, sub1.getName(), singleFileName);
+    final FsShell shell = new FsShell(conf);
+
+    final List<String> originalPaths = lsr(shell, sub1.toString());
+    System.out.println("originalPaths: " + originalPaths);
+
+    // make the archive:
+    final String fullHarPathStr = makeArchive(sub1, singleFileName);
+
+    // compare results:
+    final List<String> harPaths = lsr(shell, fullHarPathStr);
+    Assert.assertEquals(originalPaths, harPaths);
+  }
+
+  @Test
+  public void testGlobFiles() throws Exception {
+    final Path sub1 = new Path(inputPath, "dir1");
+    final Path sub2 = new Path(inputPath, "dir2");
+    fs.mkdirs(sub1);
+    String fileName = "a";
+    createFile(inputPath, fs, sub1.getName(), fileName);
+    createFile(inputPath, fs, sub2.getName(), fileName);
+    createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
+
+    final String glob =  "dir{1,2}/a";
+    final FsShell shell = new FsShell(conf);
+    final List<String> originalPaths = lsr(shell, inputPath.toString(),
+        inputPath + "/" + glob);
+    System.out.println("originalPaths: " + originalPaths);
+
+    // make the archive:
+    final String fullHarPathStr = makeArchive(inputPath, glob);
+
+    // compare results:
+    final List<String> harPaths = lsr(shell, fullHarPathStr,
+        fullHarPathStr + "/" + glob);
+    Assert.assertEquals(originalPaths, harPaths);
+  }
+
+  private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
+    return lsr(shell, rootDir, null);
+  }
+
+  private static List<String> lsr(final FsShell shell, String rootDir,
+      String glob) throws Exception {
+    final String dir = glob == null ? rootDir : glob;
+    System.out.println("lsr root=" + rootDir);
     final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
     final PrintStream out = new PrintStream(bytes);
     final PrintStream oldOut = System.out;
@@ -222,9 +271,9 @@ public class TestHadoopArchives {
       System.setErr(oldErr);
     }
     System.out.println("lsr results:\n" + results);
-    String dirname = dir;
-    if (dir.lastIndexOf(Path.SEPARATOR) != -1) {
-      dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
+    String dirname = rootDir;
+    if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
+      dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
     }
 
     final List<String> paths = new ArrayList<String>();
@@ -621,13 +670,19 @@ public class TestHadoopArchives {
     return bb;
   }
 
+
+  private String makeArchive() throws Exception {
+    return makeArchive(inputPath, null);
+  }
+
   /*
    * Run the HadoopArchives tool to create an archive on the 
    * given file system.
    */
-  private String makeArchive() throws Exception {
-    final String inputPathStr = inputPath.toUri().getPath();
-    System.out.println("inputPathStr = " + inputPathStr);
+  private String makeArchive(Path parentPath, String relGlob) throws Exception {
+    final String parentPathStr = parentPath.toUri().getPath();
+    final String relPathGlob = relGlob == null ? "*" : relGlob;
+    System.out.println("parentPathStr = " + parentPathStr);
 
     final URI uri = fs.getUri();
     final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
@@ -635,8 +690,8 @@ public class TestHadoopArchives {
 
     final String harName = "foo.har";
     final String fullHarPathStr = prefix + harName;
-    final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*",
-        archivePath.toString() };
+    final String[] args = { "-archiveName", harName, "-p", parentPathStr,
+        relPathGlob, archivePath.toString() };
     System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
         HADOOP_ARCHIVES_JAR);
     final HadoopArchives har = new HadoopArchives(conf);