You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by st...@apache.org on 2020/10/08 09:36:54 UTC
[hadoop] branch branch-3.3 updated: HADOOP-17021. Add concat fs
command (#1993)
This is an automated email from the ASF dual-hosted git repository.
stevel pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 44ff4c1 HADOOP-17021. Add concat fs command (#1993)
44ff4c1 is described below
commit 44ff4c10587029db88021890a412975081077905
Author: Jinglun <be...@outlook.com>
AuthorDate: Thu Oct 8 17:36:07 2020 +0800
HADOOP-17021. Add concat fs command (#1993)
Contributed by Jinglun
Change-Id: Ia10ad2205ed0f3594c391ee78f7df4c3c31c796d
---
.../java/org/apache/hadoop/fs/shell/Concat.java | 91 +++++++++++
.../java/org/apache/hadoop/fs/shell/FsCommand.java | 1 +
.../src/site/markdown/FileSystemShell.md | 13 ++
.../apache/hadoop/fs/shell/TestFsShellConcat.java | 167 +++++++++++++++++++++
4 files changed, 272 insertions(+)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Concat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Concat.java
new file mode 100644
index 0000000..5afafaf
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Concat.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.shell;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.LinkedList;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIOException;
+
+/**
+ * Concat the given files.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class Concat extends FsCommand {
+ public static void registerCommands(CommandFactory factory) {
+ factory.addClass(Concat.class, "-concat");
+ }
+
+ public static final String NAME = "concat";
+ public static final String USAGE = "<target path> <src path> <src path> ...";
+ public static final String DESCRIPTION = "Concatenate existing source files"
+ + " into the target file. Target file and source files should be in the"
+ + " same directory.";
+ private static FileSystem testFs; // test only.
+
+ @Override
+ protected void processArguments(LinkedList<PathData> args)
+ throws IOException {
+ if (args.size() < 1) {
+ throw new IOException("Target path not specified. " + USAGE);
+ }
+ if (args.size() < 3) {
+ throw new IOException(
+ "The number of source paths is less than 2. " + USAGE);
+ }
+ PathData target = args.removeFirst();
+ LinkedList<PathData> srcList = args;
+ if (!target.exists || !target.stat.isFile()) {
+ throw new FileNotFoundException(String
+ .format("Target path %s does not exist or is" + " not file.",
+ target.path));
+ }
+ Path[] srcArray = new Path[srcList.size()];
+ for (int i = 0; i < args.size(); i++) {
+ PathData src = srcList.get(i);
+ if (!src.exists || !src.stat.isFile()) {
+ throw new FileNotFoundException(
+ String.format("%s does not exist or is not file.", src.path));
+ }
+ srcArray[i] = src.path;
+ }
+ FileSystem fs = target.fs;
+ if (testFs != null) {
+ fs = testFs;
+ }
+ try {
+ fs.concat(target.path, srcArray);
+ } catch (UnsupportedOperationException exception) {
+ throw new PathIOException("Dest filesystem '" + fs.getUri().getScheme()
+ + "' doesn't support concat.", exception);
+ }
+ }
+
+ @VisibleForTesting
+ static void setTestFs(FileSystem fs) {
+ testFs = fs;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java
index 784bbf3..9cafbb0 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java
@@ -70,6 +70,7 @@ abstract public class FsCommand extends Command {
factory.registerCommands(Truncate.class);
factory.registerCommands(SnapshotCommands.class);
factory.registerCommands(XAttrCommands.class);
+ factory.registerCommands(Concat.class);
}
protected FsCommand() {}
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
index 54438ce..09a4cc2 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/FileSystemShell.md
@@ -810,6 +810,18 @@ Example:
* `hadoop fs -truncate 55 /user/hadoop/file1 /user/hadoop/file2`
* `hadoop fs -truncate -w 127 hdfs://nn1.example.com/user/hadoop/file1`
+concat
+--------
+
+Usage: `hadoop fs -concat <target file> <source files>`
+
+Concatenate existing source files into the target file. Target file and source
+files should be in the same directory.
+
+Example:
+
+* `hadoop fs -concat hdfs://cluster/user/hadoop/target-file hdfs://cluster/user/hadoop/file-0 hdfs://cluster/user/hadoop/file-1`
+
usage
-----
@@ -1089,6 +1101,7 @@ actually fail.
| `setfattr` | generally unsupported permissions model |
| `setrep`| has no effect |
| `truncate` | generally unsupported |
+| `concat` | generally unsupported |
Different object store clients *may* support these commands: do consult the
documentation and test against the target store.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestFsShellConcat.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestFsShellConcat.java
new file mode 100644
index 0000000..a2c4d3a
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestFsShellConcat.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.shell;
+
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.util.Random;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.assertj.core.api.Assertions;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsShell;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.AbstractHadoopTestBase;
+
+import static org.mockito.ArgumentMatchers.any;
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Test Concat.
+ */
+public class TestFsShellConcat extends AbstractHadoopTestBase {
+
+ private static Configuration conf;
+ private static FsShell shell;
+ private static LocalFileSystem lfs;
+ private static Path testRootDir;
+ private static Path dstPath;
+
+ @Before
+ public void before() throws IOException {
+ conf = new Configuration();
+ shell = new FsShell(conf);
+ lfs = FileSystem.getLocal(conf);
+ testRootDir = lfs.makeQualified(new Path(GenericTestUtils.getTempPath(
+ "testFsShellCopy")));
+
+ lfs.delete(testRootDir, true);
+ lfs.mkdirs(testRootDir);
+ lfs.setWorkingDirectory(testRootDir);
+ dstPath = new Path(testRootDir, "dstFile");
+ lfs.create(dstPath).close();
+
+ Random random = new Random();
+ for (int i = 0; i < 10; i++) {
+ OutputStream out =
+ lfs.create(new Path(testRootDir, String.format("file-%02d", i)));
+ out.write(random.nextInt());
+ out.close();
+ }
+ }
+
+ @Test
+ public void testConcat() throws Exception {
+ // Read concatenated files to build the expected file content.
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ for (int i = 0; i < 10; i++) {
+ try (InputStream in = lfs
+ .open(new Path(testRootDir, String.format("file-%02d", i)))) {
+ IOUtils.copyBytes(in, out, 1024);
+ }
+ }
+ byte[] expectContent = out.toByteArray();
+
+ // Do concat.
+ FileSystem mockFs = Mockito.mock(FileSystem.class);
+ Mockito.doAnswer(invocation -> {
+ Object[] args = invocation.getArguments();
+ Path target = (Path)args[0];
+ Path[] src = (Path[]) args[1];
+ mockConcat(target, src);
+ return null;
+ }).when(mockFs).concat(any(Path.class), any(Path[].class));
+ Concat.setTestFs(mockFs);
+ shellRun(0, "-concat", dstPath.toString(), testRootDir+"/file-*");
+
+ // Verify concat result.
+ ContractTestUtils
+ .assertPathExists(lfs, "The target file doesn't exist.", dstPath);
+ Assertions.assertThat(lfs.listStatus(testRootDir).length).isEqualTo(1);
+ assertEquals(expectContent.length, lfs.getFileStatus(dstPath).getLen());
+ out = new ByteArrayOutputStream();
+ try (InputStream in = lfs.open(dstPath)) {
+ IOUtils.copyBytes(in, out, 1024);
+ }
+ // Verify content.
+ byte[] concatedContent = out.toByteArray();
+ assertEquals(expectContent.length, concatedContent.length);
+ ContractTestUtils.compareByteArrays(expectContent, concatedContent,
+ expectContent.length);
+ }
+
+ @Test
+ public void testUnsupportedFs() throws Exception {
+ FileSystem mockFs = Mockito.mock(FileSystem.class);
+ Mockito.doThrow(
+ new UnsupportedOperationException("Mock unsupported exception."))
+ .when(mockFs).concat(any(Path.class), any(Path[].class));
+ Mockito.doAnswer(invocationOnMock -> new URI("mockfs:///")).when(mockFs)
+ .getUri();
+ Concat.setTestFs(mockFs);
+ final ByteArrayOutputStream err = new ByteArrayOutputStream();
+ PrintStream oldErr = System.err;
+ System.setErr(new PrintStream(err));
+ try {
+ shellRun(1, "-concat", dstPath.toString(), testRootDir + "/file-*");
+ } finally {
+ System.setErr(oldErr);
+ }
+ System.err.print(err.toString());
+ String expectedErrMsg = "Dest filesystem 'mockfs' doesn't support concat";
+ Assertions.assertThat(err.toString().contains(expectedErrMsg))
+ .withFailMessage("The err message should contain \"" + expectedErrMsg
+ + "\" message.").isTrue();
+ }
+
+ private void shellRun(int n, String... args) {
+ assertEquals(n, shell.run(args));
+ }
+
+ /**
+ * Simple simulation of concat.
+ */
+ private void mockConcat(Path target, Path[] srcArray) throws IOException {
+ Path tmp = new Path(target.getParent(), target.getName() + ".bak");
+ lfs.rename(target, tmp);
+ try (OutputStream out = lfs.create(target)) {
+ try (InputStream in = lfs.open(tmp)) {
+ IOUtils.copyBytes(in, out, 1024);
+ }
+ lfs.delete(tmp, true);
+ for (int i = 0; i < srcArray.length; i++) {
+ try (InputStream iin = lfs.open(srcArray[i])) {
+ IOUtils.copyBytes(iin, out, 1024);
+ }
+ lfs.delete(srcArray[i], true);
+ }
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org