You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by as...@apache.org on 2015/12/23 00:51:28 UTC
[29/50] [abbrv] hadoop git commit: HADOOP-12657. Add a option to skip
newline on empty files with getMerge -nl. Contributed by Kanaka Kumar Avvaru.
HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl. Contributed by Kanaka Kumar Avvaru.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/061c05cc
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/061c05cc
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/061c05cc
Branch: refs/heads/yarn-2877
Commit: 061c05cc05ff6257b14c5c4f25cbcec2d184cda7
Parents: bd5e207
Author: Akira Ajisaka <aa...@apache.org>
Authored: Fri Dec 18 13:58:28 2015 +0900
Committer: Akira Ajisaka <aa...@apache.org>
Committed: Fri Dec 18 13:58:28 2015 +0900
----------------------------------------------------------------------
hadoop-common-project/hadoop-common/CHANGES.txt | 3 ++
.../apache/hadoop/fs/shell/CopyCommands.java | 40 ++++++++++++--------
.../src/site/markdown/FileSystemShell.md | 1 +
.../org/apache/hadoop/fs/TestFsShellCopy.java | 24 ++++++++++--
.../src/test/resources/testConf.xml | 7 +++-
5 files changed, 55 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/061c05cc/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 9bd4d6e..6263f74 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -688,6 +688,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-10300. Allowed deferred sending of call responses. (Daryn Sharp via
yliu)
+ HADOOP-12657. Add a option to skip newline on empty files with getMerge -nl.
+ (Kanaka Kumar Avvaru via aajisaka)
+
IMPROVEMENTS
HADOOP-12458. Retries is typoed to spell Retires in parts of
http://git-wip-us.apache.org/repos/asf/hadoop/blob/061c05cc/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
index c4e42c9..e2fad75 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
@@ -53,24 +53,29 @@ class CopyCommands {
/** merge multiple files together */
public static class Merge extends FsCommand {
public static final String NAME = "getmerge";
- public static final String USAGE = "[-nl] <src> <localdst>";
+ public static final String USAGE = "[-nl] [-skip-empty-file] "
+ + "<src> <localdst>";
public static final String DESCRIPTION =
- "Get all the files in the directories that " +
- "match the source file pattern and merge and sort them to only " +
- "one file on local fs. <src> is kept.\n" +
- "-nl: Add a newline character at the end of each file.";
+ "Get all the files in the directories that "
+ + "match the source file pattern and merge and sort them to only "
+ + "one file on local fs. <src> is kept.\n"
+ + "-nl: Add a newline character at the end of each file.\n"
+ + "-skip-empty-file: Do not add new line character for empty file.";
protected PathData dst = null;
protected String delimiter = null;
+ private boolean skipEmptyFileDelimiter;
protected List<PathData> srcs = null;
@Override
protected void processOptions(LinkedList<String> args) throws IOException {
try {
- CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl");
+ CommandFormat cf = new CommandFormat(2, Integer.MAX_VALUE, "nl",
+ "skip-empty-file");
cf.parse(args);
delimiter = cf.getOpt("nl") ? "\n" : null;
+ skipEmptyFileDelimiter = cf.getOpt("skip-empty-file");
dst = new PathData(new URI(args.removeLast()), getConf());
if (dst.exists && dst.stat.isDirectory()) {
@@ -92,21 +97,26 @@ class CopyCommands {
FSDataOutputStream out = dst.fs.create(dst.path);
try {
for (PathData src : srcs) {
- FSDataInputStream in = src.fs.open(src.path);
- try {
- IOUtils.copyBytes(in, out, getConf(), false);
- if (delimiter != null) {
- out.write(delimiter.getBytes("UTF-8"));
+ if (src.stat.getLen() != 0) {
+ try (FSDataInputStream in = src.fs.open(src.path)) {
+ IOUtils.copyBytes(in, out, getConf(), false);
+ writeDelimiter(out);
}
- } finally {
- in.close();
+ } else if (!skipEmptyFileDelimiter) {
+ writeDelimiter(out);
}
}
} finally {
out.close();
- }
+ }
}
-
+
+ private void writeDelimiter(FSDataOutputStream out) throws IOException {
+ if (delimiter != null) {
+ out.write(delimiter.getBytes("UTF-8"));
+ }
+ }
+
@Override
protected void processNonexistentPath(PathData item) throws IOException {
exitCode = 1; // flag that a path is bad
http://git-wip-us.apache.org/repos/asf/hadoop/blob/061c05cc/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
index e243dea..d32156e 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
@@ -375,6 +375,7 @@ getmerge
Usage: `hadoop fs -getmerge [-nl] <src> <localdst>`
Takes a source directory and a destination file as input and concatenates files in src into the destination local file. Optionally -nl can be set to enable adding a newline character (LF) at the end of each file.
+-skip-empty-file can be used to avoid unwanted newline characters in case of empty files.
Examples:
http://git-wip-us.apache.org/repos/asf/hadoop/blob/061c05cc/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
index 1d626f9..6b5de74 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
@@ -318,6 +318,7 @@ public class TestFsShellCopy {
Path f1 = new Path(root, "f1");
Path f2 = new Path(root, "f2");
Path f3 = new Path(root, "f3");
+ Path empty = new Path(root, "empty");
Path fnf = new Path(root, "fnf");
Path d = new Path(root, "dir");
Path df1 = new Path(d, "df1");
@@ -325,7 +326,8 @@ public class TestFsShellCopy {
Path df3 = new Path(d, "df3");
createFile(f1, f2, f3, df1, df2, df3);
-
+ createEmptyFile(empty);
+
int exit;
// one file, kind of silly
exit = shell.run(new String[]{
@@ -366,6 +368,13 @@ public class TestFsShellCopy {
assertEquals(0, exit);
assertEquals("f1\nf2\n", readFile("out"));
+ exit = shell.run(new String[]{
+ "-getmerge", "-nl", "-skip-empty-file",
+ f1.toString(), f2.toString(), empty.toString(),
+ "out" });
+ assertEquals(0, exit);
+ assertEquals("f1\nf2\n", readFile("out"));
+
// glob three files
shell.run(new String[]{
"-getmerge", "-nl",
@@ -374,13 +383,13 @@ public class TestFsShellCopy {
assertEquals(0, exit);
assertEquals("f1\nf2\nf3\n", readFile("out"));
- // directory with 3 files, should skip subdir
+ // directory with 1 empty + 3 non empty files, should skip subdir
shell.run(new String[]{
"-getmerge", "-nl",
root.toString(),
"out" });
assertEquals(0, exit);
- assertEquals("f1\nf2\nf3\n", readFile("out"));
+ assertEquals("\nf1\nf2\nf3\n", readFile("out"));
// subdir
shell.run(new String[]{
@@ -538,7 +547,14 @@ public class TestFsShellCopy {
out.close();
}
}
-
+
+ private void createEmptyFile(Path ... paths) throws IOException {
+ for (Path path : paths) {
+ FSDataOutputStream out = lfs.create(path);
+ out.close();
+ }
+ }
+
private String readFile(String out) throws IOException {
Path path = new Path(out);
FileStatus stat = lfs.getFileStatus(path);
http://git-wip-us.apache.org/repos/asf/hadoop/blob/061c05cc/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
index fd71034..79ab282 100644
--- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
+++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
@@ -601,7 +601,7 @@
<comparators>
<comparator>
<type>RegexpComparator</type>
- <expected-output>^-getmerge \[-nl\] <src> <localdst> :\s*</expected-output>
+ <expected-output>^-getmerge \[-nl\] \[-skip-empty-file\] <src> <localdst> :\s*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
@@ -615,6 +615,11 @@
<type>RegexpComparator</type>
<expected-output>^( |\t)*-nl\s+Add a newline character at the end of each file.( )*</expected-output>
</comparator>
+ <comparator>
+ <type>RegexpComparator</type>
+ <expected-output>^( |\t)*-skip-empty-file\s+Do not add new line character for empty file.( )*</expected-output>
+ </comparator>
+
</comparators>
</test>