You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wa...@apache.org on 2017/09/13 00:35:36 UTC
hadoop git commit: HDFS-12222. Document and test BlockLocation for
erasure-coded files. Contributed by Huafeng Wang.
Repository: hadoop
Updated Branches:
refs/heads/trunk a1c398e0d -> f4b626746
HDFS-12222. Document and test BlockLocation for erasure-coded files. Contributed by Huafeng Wang.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f4b62674
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f4b62674
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f4b62674
Branch: refs/heads/trunk
Commit: f4b6267465d139bfdaf75e25761672eaf61d8a11
Parents: a1c398e
Author: Andrew Wang <wa...@apache.org>
Authored: Tue Sep 12 17:35:30 2017 -0700
Committer: Andrew Wang <wa...@apache.org>
Committed: Tue Sep 12 17:35:30 2017 -0700
----------------------------------------------------------------------
.../apache/hadoop/fs/AbstractFileSystem.java | 5 +
.../org/apache/hadoop/fs/BlockLocation.java | 28 +++
.../java/org/apache/hadoop/fs/FileContext.java | 33 +++-
.../java/org/apache/hadoop/fs/FileSystem.java | 32 +++-
.../org/apache/hadoop/fs/LocatedFileStatus.java | 7 +
.../main/java/org/apache/hadoop/fs/Hdfs.java | 15 ++
.../java/org/apache/hadoop/hdfs/DFSClient.java | 5 +
.../hadoop/hdfs/DistributedFileSystem.java | 14 ++
.../hdfs/protocol/HdfsLocatedFileStatus.java | 12 +-
.../hadoop/hdfs/TestDistributedFileSystem.java | 1 -
.../TestDistributedFileSystemWithECFile.java | 186 +++++++++++++++++++
11 files changed, 333 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
index df14ee8..cf484ca 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
@@ -928,6 +928,11 @@ public abstract class AbstractFileSystem {
* The specification of this method matches that of
* {@link FileContext#listLocatedStatus(Path)} except that Path f
* must be for this file system.
+ *
+ * In HDFS implementation, the BlockLocation of returned LocatedFileStatus
+ * will have different formats for replicated and erasure coded file. Please
+ * refer to {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+ * for more details.
*/
public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
throws AccessControlException, FileNotFoundException,
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
index 591febf..4dae233 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
@@ -28,6 +28,34 @@ import org.apache.hadoop.util.StringInterner;
* Represents the network location of a block, information about the hosts
* that contain block replicas, and other block metadata (E.g. the file
* offset associated with the block, length, whether it is corrupt, etc).
+ *
+ * For a single BlockLocation, it will have different meanings for replicated
+ * and erasure coded files.
+ *
+ * If the file is 3-replicated, offset and length of a BlockLocation represent
+ * the absolute value in the file and the hosts are the 3 datanodes that
+ * holding the replicas. Here is an example:
+ * <pre>
+ * BlockLocation(offset: 0, length: BLOCK_SIZE,
+ * hosts: {"host1:9866", "host2:9866, host3:9866"})
+ * </pre>
+ *
+ * And if the file is erasure-coded, each BlockLocation represents a logical
+ * block groups. Value offset is the offset of a block group in the file and
+ * value length is the total length of a block group. Hosts of a BlockLocation
+ * are the datanodes that holding all the data blocks and parity blocks of a
+ * block group.
+ * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ * A BlockLocation example will be like:
+ * <pre>
+ * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ * "host2:9866","host3:9866","host4:9866","host5:9866"})
+ * </pre>
+ *
+ * Please refer to
+ * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ * {@link FileContext#getFileBlockLocations(Path, long, long)}
+ * for more examples.
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
index 21733b3..a3cc550 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
@@ -1293,7 +1293,36 @@ public class FileContext {
*
* This call is most helpful with DFS, where it returns
* hostnames of machines that contain the given file.
- *
+ *
+ * In HDFS, if file is three-replicated, the returned array contains
+ * elements like:
+ * <pre>
+ * BlockLocation(offset: 0, length: BLOCK_SIZE,
+ * hosts: {"host1:9866", "host2:9866, host3:9866"})
+ * BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+ * hosts: {"host2:9866", "host3:9866, host4:9866"})
+ * </pre>
+ *
+ * And if a file is erasure-coded, the returned BlockLocation are logical
+ * block groups.
+ *
+ * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ * 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ * there will be one BlockLocation returned, with 0 offset, actual file size
+ * and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ * 3. If the file size is less than one group size but greater than one
+ * stripe size, then there will be one BlockLocation returned, with 0 offset,
+ * actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ * the actual blocks.
+ * 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ * for example, then the result will be like:
+ * <pre>
+ * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ * "host2:9866","host3:9866","host4:9866","host5:9866"})
+ * BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+ * "host4:9866", "host5:9866"})
+ * </pre>
+ *
* @param f - get blocklocations of this file
* @param start position (byte offset)
* @param len (in bytes)
@@ -1527,7 +1556,7 @@ public class FileContext {
* Return the file's status and block locations If the path is a file.
*
* If a returned status is a file, it contains the file's block locations.
- *
+ *
* @param f is the path
*
* @return an iterator that traverses statuses of the files/directories
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
index fc7b9b2..d43e41d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
@@ -799,7 +799,36 @@ public abstract class FileSystem extends Configured implements Closeable {
* The default implementation returns an array containing one element:
* <pre>
* BlockLocation( { "localhost:9866" }, { "localhost" }, 0, file.getLen())
- * </pre>>
+ * </pre>
+ *
+ * In HDFS, if file is three-replicated, the returned array contains
+ * elements like:
+ * <pre>
+ * BlockLocation(offset: 0, length: BLOCK_SIZE,
+ * hosts: {"host1:9866", "host2:9866, host3:9866"})
+ * BlockLocation(offset: BLOCK_SIZE, length: BLOCK_SIZE,
+ * hosts: {"host2:9866", "host3:9866, host4:9866"})
+ * </pre>
+ *
+ * And if a file is erasure-coded, the returned BlockLocation are logical
+ * block groups.
+ *
+ * Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ * 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ * there will be one BlockLocation returned, with 0 offset, actual file size
+ * and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ * 3. If the file size is less than one group size but greater than one
+ * stripe size, then there will be one BlockLocation returned, with 0 offset,
+ * actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ * the actual blocks.
+ * 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ * for example, then the result will be like:
+ * <pre>
+ * BlockLocation(offset: 0, length: 3 * BLOCK_SIZE, hosts: {"host1:9866",
+ * "host2:9866","host3:9866","host4:9866","host5:9866"})
+ * BlockLocation(offset: 3 * BLOCK_SIZE, length: 123, hosts: {"host1:9866",
+ * "host4:9866", "host5:9866"})
+ * </pre>
*
* @param file FilesStatus to get data from
* @param start offset into the given file
@@ -2115,6 +2144,7 @@ public abstract class FileSystem extends Configured implements Closeable {
* List the statuses and block locations of the files in the given path.
* Does not guarantee to return the iterator that traverses statuses
* of the files in a sorted order.
+ *
* <pre>
* If the path is a directory,
* if recursive is false, returns files in the directory;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
index dbb751d..29e1998 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocatedFileStatus.java
@@ -123,6 +123,13 @@ public class LocatedFileStatus extends FileStatus {
/**
* Get the file's block locations
+ *
+ * In HDFS, the returned BlockLocation will have different formats for
+ * replicated and erasure coded file.
+ * Please refer to
+ * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+ * for more details.
+ *
* @return the file's block locations
*/
public BlockLocation[] getBlockLocations() {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
index cd870ca..0138195 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java
@@ -115,6 +115,14 @@ public class Hdfs extends AbstractFileSystem {
return dfs.delete(getUriPath(f), recursive);
}
+ /**
+ * The returned BlockLocation will have different formats for replicated
+ * and erasure coded file.
+ *
+ * Please refer to
+ * {@link FileContext#getFileBlockLocations(Path, long, long)}
+ * for more details.
+ */
@Override
public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
throws IOException, UnresolvedLinkException {
@@ -165,6 +173,13 @@ public class Hdfs extends AbstractFileSystem {
return dfs.getServerDefaults();
}
+ /**
+ * The BlockLocation of returned LocatedFileStatus will have different
+ * formats for replicated and erasure coded file.
+ * Please refer to
+ * {@link FileContext#getFileBlockLocations(Path, long, long)} for
+ * more details.
+ */
@Override
public RemoteIterator<LocatedFileStatus> listLocatedStatus(
final Path p)
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 9239df3..772049d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -72,6 +72,7 @@ import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileEncryptionInfo;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.FsStatus;
@@ -866,6 +867,10 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
* data-placement when performing operations. For example, the
* MapReduce system tries to schedule tasks on the same machines
* as the data-block the task processes.
+ *
+ * Please refer to
+ * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+ * for more details.
*/
public BlockLocation[] getBlockLocations(String src, long start,
long length) throws IOException {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 44caed6..f6331cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -240,6 +240,13 @@ public class DistributedFileSystem extends FileSystem {
return getFileBlockLocations(file.getPath(), start, len);
}
+ /**
+ * The returned BlockLocation will have different formats for replicated
+ * and erasure coded file.
+ * Please refer to
+ * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
+ * for more details.
+ */
@Override
public BlockLocation[] getFileBlockLocations(Path p,
final long start, final long len) throws IOException {
@@ -1040,6 +1047,13 @@ public class DistributedFileSystem extends FileSystem {
}.resolve(this, absF);
}
+ /**
+ * The BlockLocation of returned LocatedFileStatus will have different
+ * formats for replicated and erasure coded file.
+ * Please refer to
+ * {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} for
+ * more details.
+ */
@Override
protected RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path p,
final PathFilter filter)
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
index b82a860..193aae2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java
@@ -78,6 +78,17 @@ public class HdfsLocatedFileStatus extends HdfsFileStatus {
return locations;
}
+ /**
+ * This function is used to transform the underlying HDFS LocatedBlocks to
+ * BlockLocations.
+ *
+ * The returned BlockLocation will have different formats for replicated
+ * and erasure coded file.
+ * Please refer to
+ * {@link org.apache.hadoop.fs.FileSystem#getFileBlockLocations
+ * (FileStatus, long, long)}
+ * for examples.
+ */
public final LocatedFileStatus makeQualifiedLocated(URI defaultUri,
Path path) {
makeQualified(defaultUri, path);
@@ -96,5 +107,4 @@ public class HdfsLocatedFileStatus extends HdfsFileStatus {
// satisfy findbugs
return super.hashCode();
}
-
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
index 8e54e5f..987992e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java
@@ -50,7 +50,6 @@ import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicReference;
-import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.CommonConfigurationKeys;
http://git-wip-us.apache.org/repos/asf/hadoop/blob/f4b62674/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java
new file mode 100644
index 0000000..a6a3a80
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystemWithECFile.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Testing correctness of FileSystem.getFileBlockLocations and
+ * FileSystem.listFiles for erasure coded files.
+ */
+public class TestDistributedFileSystemWithECFile {
+ private final ErasureCodingPolicy ecPolicy =
+ StripedFileTestUtil.getDefaultECPolicy();
+ private final int cellSize = ecPolicy.getCellSize();
+ private final short dataBlocks = (short) ecPolicy.getNumDataUnits();
+ private final short parityBlocks = (short) ecPolicy.getNumParityUnits();
+ private final int numDNs = dataBlocks + parityBlocks;
+ private final int stripesPerBlock = 4;
+ private final int blockSize = stripesPerBlock * cellSize;
+ private final int blockGroupSize = blockSize * dataBlocks;
+
+ private MiniDFSCluster cluster;
+ private FileContext fileContext;
+ private DistributedFileSystem fs;
+ private Configuration conf = new HdfsConfiguration();
+
+ @Before
+ public void setup() throws IOException {
+ conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+ conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY,
+ false);
+ conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
+ StripedFileTestUtil.getDefaultECPolicy().getName());
+ cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build();
+ fileContext = FileContext.getFileContext(cluster.getURI(0), conf);
+ fs = cluster.getFileSystem();
+ fs.mkdirs(new Path("/ec"));
+ cluster.getFileSystem().getClient().setErasureCodingPolicy("/ec",
+ StripedFileTestUtil.getDefaultECPolicy().getName());
+ }
+
+ @After
+ public void tearDown() throws IOException {
+ if (cluster != null) {
+ cluster.shutdown();
+ cluster = null;
+ }
+ }
+
+ private void createFile(String path, int size) throws Exception {
+ byte[] expected = StripedFileTestUtil.generateBytes(size);
+ Path src = new Path(path);
+ DFSTestUtil.writeFile(fs, src, new String(expected));
+ StripedFileTestUtil.waitBlockGroupsReported(fs, src.toString());
+ StripedFileTestUtil.verifyLength(fs, src, size);
+ }
+
+ @Test(timeout=60000)
+ public void testListECFilesSmallerThanOneCell() throws Exception {
+ createFile("/ec/smallcell", 1);
+ final List<LocatedFileStatus> retVal = new ArrayList<>();
+ final RemoteIterator<LocatedFileStatus> iter =
+ cluster.getFileSystem().listFiles(new Path("/ec"), true);
+ while (iter.hasNext()) {
+ retVal.add(iter.next());
+ }
+ assertTrue(retVal.size() == 1);
+ LocatedFileStatus fileStatus = retVal.get(0);
+ assertSmallerThanOneCell(fileStatus.getBlockLocations());
+
+ BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations(
+ fileStatus, 0, fileStatus.getLen());
+ assertSmallerThanOneCell(locations);
+
+ //Test FileContext
+ fileStatus = fileContext.listLocatedStatus(new Path("/ec")).next();
+ assertSmallerThanOneCell(fileStatus.getBlockLocations());
+ locations = fileContext.getFileBlockLocations(new Path("/ec/smallcell"),
+ 0, fileStatus.getLen());
+ assertSmallerThanOneCell(locations);
+ }
+
+ private void assertSmallerThanOneCell(BlockLocation[] locations)
+ throws IOException {
+ assertTrue(locations.length == 1);
+ BlockLocation blockLocation = locations[0];
+ assertTrue(blockLocation.getOffset() == 0);
+ assertTrue(blockLocation.getLength() == 1);
+ assertTrue(blockLocation.getHosts().length == 1 + parityBlocks);
+ }
+
+ @Test(timeout=60000)
+ public void testListECFilesSmallerThanOneStripe() throws Exception {
+ int dataBlocksNum = 3;
+ createFile("/ec/smallstripe", cellSize * dataBlocksNum);
+ RemoteIterator<LocatedFileStatus> iter =
+ cluster.getFileSystem().listFiles(new Path("/ec"), true);
+ LocatedFileStatus fileStatus = iter.next();
+ assertSmallerThanOneStripe(fileStatus.getBlockLocations(), dataBlocksNum);
+
+ BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations(
+ fileStatus, 0, fileStatus.getLen());
+ assertSmallerThanOneStripe(locations, dataBlocksNum);
+
+ //Test FileContext
+ fileStatus = fileContext.listLocatedStatus(new Path("/ec")).next();
+ assertSmallerThanOneStripe(fileStatus.getBlockLocations(), dataBlocksNum);
+ locations = fileContext.getFileBlockLocations(new Path("/ec/smallstripe"),
+ 0, fileStatus.getLen());
+ assertSmallerThanOneStripe(locations, dataBlocksNum);
+ }
+
+ private void assertSmallerThanOneStripe(BlockLocation[] locations,
+ int dataBlocksNum) throws IOException {
+ int expectedHostNum = dataBlocksNum + parityBlocks;
+ assertTrue(locations.length == 1);
+ BlockLocation blockLocation = locations[0];
+ assertTrue(blockLocation.getHosts().length == expectedHostNum);
+ assertTrue(blockLocation.getOffset() == 0);
+ assertTrue(blockLocation.getLength() == dataBlocksNum * cellSize);
+ }
+
+ @Test(timeout=60000)
+ public void testListECFilesMoreThanOneBlockGroup() throws Exception {
+ createFile("/ec/group", blockGroupSize + 123);
+ RemoteIterator<LocatedFileStatus> iter =
+ cluster.getFileSystem().listFiles(new Path("/ec"), true);
+ LocatedFileStatus fileStatus = iter.next();
+ assertMoreThanOneBlockGroup(fileStatus.getBlockLocations(), 123);
+
+ BlockLocation[] locations = cluster.getFileSystem().getFileBlockLocations(
+ fileStatus, 0, fileStatus.getLen());
+ assertMoreThanOneBlockGroup(locations, 123);
+
+ //Test FileContext
+ iter = fileContext.listLocatedStatus(new Path("/ec"));
+ fileStatus = iter.next();
+ assertMoreThanOneBlockGroup(fileStatus.getBlockLocations(), 123);
+ locations = fileContext.getFileBlockLocations(new Path("/ec/group"),
+ 0, fileStatus.getLen());
+ assertMoreThanOneBlockGroup(locations, 123);
+ }
+
+ private void assertMoreThanOneBlockGroup(BlockLocation[] locations,
+ int lastBlockSize) throws IOException {
+ assertTrue(locations.length == 2);
+ BlockLocation fistBlockGroup = locations[0];
+ assertTrue(fistBlockGroup.getHosts().length == numDNs);
+ assertTrue(fistBlockGroup.getOffset() == 0);
+ assertTrue(fistBlockGroup.getLength() == blockGroupSize);
+ BlockLocation lastBlock = locations[1];
+ assertTrue(lastBlock.getHosts().length == 1 + parityBlocks);
+ assertTrue(lastBlock.getOffset() == blockGroupSize);
+ assertTrue(lastBlock.getLength() == lastBlockSize);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org