You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cn...@apache.org on 2014/11/19 02:10:14 UTC
hadoop git commit: HADOOP-11201. Hadoop Archives should support globs
resolving to files. Contributed by Gera Shegalov.
Repository: hadoop
Updated Branches:
refs/heads/trunk 9e81be011 -> 79301e80d
HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/79301e80
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/79301e80
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/79301e80
Branch: refs/heads/trunk
Commit: 79301e80d7510f055c01a06970bb409607a4197c
Parents: 9e81be0
Author: cnauroth <cn...@apache.org>
Authored: Tue Nov 18 17:05:48 2014 -0800
Committer: cnauroth <cn...@apache.org>
Committed: Tue Nov 18 17:05:48 2014 -0800
----------------------------------------------------------------------
hadoop-common-project/hadoop-common/CHANGES.txt | 3 +
.../org/apache/hadoop/tools/HadoopArchives.java | 17 ++---
.../apache/hadoop/tools/TestHadoopArchives.java | 77 +++++++++++++++++---
3 files changed, 75 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/79301e80/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 013353a..a15a8c1 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -452,6 +452,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
+ HADOOP-11201. Hadoop Archives should support globs resolving to files.
+ (Gera Shegalov via cnauroth)
+
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES
http://git-wip-us.apache.org/repos/asf/hadoop/blob/79301e80/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
index e53576d..aa30277 100644
--- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java
@@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
short repl = 10;
private static final String usage = "archive"
- + " -archiveName NAME -p <parent path> [-r <replication factor>]" +
+ + " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
"<src>* <dest>" +
"\n";
@@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
*/
private void writeTopLevelDirs(SequenceFile.Writer srcWriter,
List<Path> paths, Path parentPath) throws IOException {
- //add all the directories
- List<Path> justDirs = new ArrayList<Path>();
+ // extract paths from absolute URI's
+ List<Path> justPaths = new ArrayList<Path>();
for (Path p: paths) {
- if (!p.getFileSystem(getConf()).isFile(p)) {
- justDirs.add(new Path(p.toUri().getPath()));
- }
- else {
- justDirs.add(new Path(p.getParent().toUri().getPath()));
- }
+ justPaths.add(new Path(p.toUri().getPath()));
}
/* find all the common parents of paths that are valid archive
* paths. The below is done so that we do not add a common path
@@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
Path root = new Path(Path.SEPARATOR);
for (int i = parentPath.depth(); i < deepest.depth(); i++) {
List<Path> parents = new ArrayList<Path>();
- for (Path p: justDirs) {
+ for (Path p: justPaths) {
if (p.compareTo(root) == 0){
//do nothing
}
@@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
}
}
}
- justDirs = parents;
+ justPaths = parents;
}
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
for (Map.Entry<String, HashSet<String>> entry : keyVals) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/79301e80/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
index e7eef3f..3fa5919 100644
--- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
+++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java
@@ -203,9 +203,58 @@ public class TestHadoopArchives {
Assert.assertEquals(originalPaths, harPaths);
}
- private static List<String> lsr(final FsShell shell, String dir)
- throws Exception {
- System.out.println("lsr root=" + dir);
+ @Test
+ public void testSingleFile() throws Exception {
+ final Path sub1 = new Path(inputPath, "dir1");
+ fs.mkdirs(sub1);
+ String singleFileName = "a";
+ createFile(inputPath, fs, sub1.getName(), singleFileName);
+ final FsShell shell = new FsShell(conf);
+
+ final List<String> originalPaths = lsr(shell, sub1.toString());
+ System.out.println("originalPaths: " + originalPaths);
+
+ // make the archive:
+ final String fullHarPathStr = makeArchive(sub1, singleFileName);
+
+ // compare results:
+ final List<String> harPaths = lsr(shell, fullHarPathStr);
+ Assert.assertEquals(originalPaths, harPaths);
+ }
+
+ @Test
+ public void testGlobFiles() throws Exception {
+ final Path sub1 = new Path(inputPath, "dir1");
+ final Path sub2 = new Path(inputPath, "dir2");
+ fs.mkdirs(sub1);
+ String fileName = "a";
+ createFile(inputPath, fs, sub1.getName(), fileName);
+ createFile(inputPath, fs, sub2.getName(), fileName);
+ createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
+
+ final String glob = "dir{1,2}/a";
+ final FsShell shell = new FsShell(conf);
+ final List<String> originalPaths = lsr(shell, inputPath.toString(),
+ inputPath + "/" + glob);
+ System.out.println("originalPaths: " + originalPaths);
+
+ // make the archive:
+ final String fullHarPathStr = makeArchive(inputPath, glob);
+
+ // compare results:
+ final List<String> harPaths = lsr(shell, fullHarPathStr,
+ fullHarPathStr + "/" + glob);
+ Assert.assertEquals(originalPaths, harPaths);
+ }
+
+ private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
+ return lsr(shell, rootDir, null);
+ }
+
+ private static List<String> lsr(final FsShell shell, String rootDir,
+ String glob) throws Exception {
+ final String dir = glob == null ? rootDir : glob;
+ System.out.println("lsr root=" + rootDir);
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream out = new PrintStream(bytes);
final PrintStream oldOut = System.out;
@@ -222,9 +271,9 @@ public class TestHadoopArchives {
System.setErr(oldErr);
}
System.out.println("lsr results:\n" + results);
- String dirname = dir;
- if (dir.lastIndexOf(Path.SEPARATOR) != -1) {
- dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
+ String dirname = rootDir;
+ if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
+ dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
}
final List<String> paths = new ArrayList<String>();
@@ -621,13 +670,19 @@ public class TestHadoopArchives {
return bb;
}
+
+ private String makeArchive() throws Exception {
+ return makeArchive(inputPath, null);
+ }
+
/*
* Run the HadoopArchives tool to create an archive on the
* given file system.
*/
- private String makeArchive() throws Exception {
- final String inputPathStr = inputPath.toUri().getPath();
- System.out.println("inputPathStr = " + inputPathStr);
+ private String makeArchive(Path parentPath, String relGlob) throws Exception {
+ final String parentPathStr = parentPath.toUri().getPath();
+ final String relPathGlob = relGlob == null ? "*" : relGlob;
+ System.out.println("parentPathStr = " + parentPathStr);
final URI uri = fs.getUri();
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
@@ -635,8 +690,8 @@ public class TestHadoopArchives {
final String harName = "foo.har";
final String fullHarPathStr = prefix + harName;
- final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*",
- archivePath.toString() };
+ final String[] args = { "-archiveName", harName, "-p", parentPathStr,
+ relPathGlob, archivePath.toString() };
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
HADOOP_ARCHIVES_JAR);
final HadoopArchives har = new HadoopArchives(conf);