You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by wa...@apache.org on 2013/10/09 23:30:09 UTC
svn commit: r1530802 - in
/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/org/apache/hadoop/hdfs/protocol/
src/main/java/org/apache/hadoop/hdfs/protocolPB/
src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/...
Author: wang
Date: Wed Oct 9 21:30:08 2013
New Revision: 1530802
URL: http://svn.apache.org/r1530802
Log:
HDFS-5304. Expose if a block replica is cached in getFileBlockLocations. (Contributed by Andrew Wang)
Modified:
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto
hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-4949.txt Wed Oct 9 21:30:08 2013
@@ -54,6 +54,9 @@ HDFS-4949 (Unreleased)
HDFS-5190. Move cache pool related CLI commands to CacheAdmin.
(Contributed by Andrew Wang)
+ HDFS-5304. Expose if a block replica is cached in getFileBlockLocations.
+ (Contributed by Andrew Wang)
+
OPTIMIZATIONS
BUG FIXES
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java Wed Oct 9 21:30:08 2013
@@ -17,15 +17,21 @@
*/
package org.apache.hadoop.hdfs.protocol;
+import java.util.List;
+
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.security.token.Token;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
/**
* Associates a block with the Datanodes that contain its replicas
* and other block metadata (E.g. the file offset associated with this
- * block, whether it is corrupt, security token, etc).
+ * block, whether it is corrupt, a location is cached in memory,
+ * security token, etc).
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
@@ -39,9 +45,16 @@ public class LocatedBlock {
// their locations are not part of this object
private boolean corrupt;
private Token<BlockTokenIdentifier> blockToken = new Token<BlockTokenIdentifier>();
+ /**
+ * List of cached datanode locations
+ */
+ private DatanodeInfo[] cachedLocs;
+
+ // Used when there are no locations
+ private static final DatanodeInfo[] EMPTY_LOCS = new DatanodeInfo[0];
public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs) {
- this(b, locs, -1, false); // startOffset is unknown
+ this(b, locs, -1); // startOffset is unknown
}
public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs, long startOffset) {
@@ -50,14 +63,26 @@ public class LocatedBlock {
public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs, long startOffset,
boolean corrupt) {
+ this(b, locs, startOffset, corrupt, EMPTY_LOCS);
+ }
+
+ public LocatedBlock(ExtendedBlock b, DatanodeInfo[] locs, long startOffset,
+ boolean corrupt, DatanodeInfo[] cachedLocs) {
this.b = b;
this.offset = startOffset;
this.corrupt = corrupt;
if (locs==null) {
- this.locs = new DatanodeInfo[0];
+ this.locs = EMPTY_LOCS;
} else {
this.locs = locs;
}
+ Preconditions.checkArgument(cachedLocs != null,
+ "cachedLocs should not be null, use a different constructor");
+ if (cachedLocs.length == 0) {
+ this.cachedLocs = EMPTY_LOCS;
+ } else {
+ this.cachedLocs = cachedLocs;
+ }
}
public Token<BlockTokenIdentifier> getBlockToken() {
@@ -96,6 +121,36 @@ public class LocatedBlock {
return this.corrupt;
}
+ /**
+ * Add a the location of a cached replica of the block.
+ *
+ * @param loc of datanode with the cached replica
+ */
+ public void addCachedLoc(DatanodeInfo loc) {
+ List<DatanodeInfo> cachedList = Lists.newArrayList(cachedLocs);
+ if (cachedList.contains(loc)) {
+ return;
+ }
+ // Try to re-use a DatanodeInfo already in loc
+ for (int i=0; i<locs.length; i++) {
+ if (locs[i].equals(loc)) {
+ cachedList.add(locs[i]);
+ cachedLocs = cachedList.toArray(cachedLocs);
+ return;
+ }
+ }
+ // Not present in loc, add it and go
+ cachedList.add(loc);
+ cachedLocs = cachedList.toArray(cachedLocs);
+ }
+
+ /**
+ * @return Datanodes with a cached block replica
+ */
+ public DatanodeInfo[] getCachedLocations() {
+ return cachedLocs;
+ }
+
@Override
public String toString() {
return getClass().getSimpleName() + "{" + b
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java Wed Oct 9 21:30:08 2013
@@ -150,6 +150,7 @@ import org.apache.hadoop.security.proto.
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DataChecksum;
+import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
import com.google.protobuf.CodedInputStream;
@@ -566,9 +567,21 @@ public class PBHelper {
if (b == null) return null;
Builder builder = LocatedBlockProto.newBuilder();
DatanodeInfo[] locs = b.getLocations();
+ List<DatanodeInfo> cachedLocs =
+ Lists.newLinkedList(Arrays.asList(b.getCachedLocations()));
for (int i = 0; i < locs.length; i++) {
- builder.addLocs(i, PBHelper.convert(locs[i]));
+ DatanodeInfo loc = locs[i];
+ builder.addLocs(i, PBHelper.convert(loc));
+ boolean locIsCached = cachedLocs.contains(loc);
+ builder.addIsCached(locIsCached);
+ if (locIsCached) {
+ cachedLocs.remove(loc);
+ }
}
+ Preconditions.checkArgument(cachedLocs.size() == 0,
+ "Found additional cached replica locations that are not in the set of"
+ + " storage-backed locations!");
+
return builder.setB(PBHelper.convert(b.getBlock()))
.setBlockToken(PBHelper.convert(b.getBlockToken()))
.setCorrupt(b.isCorrupt()).setOffset(b.getStartOffset()).build();
@@ -581,9 +594,20 @@ public class PBHelper {
for (int i = 0; i < locs.size(); i++) {
targets[i] = PBHelper.convert(locs.get(i));
}
+ // Set values from the isCached list, re-using references from loc
+ List<DatanodeInfo> cachedLocs = new ArrayList<DatanodeInfo>(locs.size());
+ List<Boolean> isCachedList = proto.getIsCachedList();
+ for (int i=0; i<isCachedList.size(); i++) {
+ if (isCachedList.get(i)) {
+ cachedLocs.add(targets[i]);
+ }
+ }
+
LocatedBlock lb = new LocatedBlock(PBHelper.convert(proto.getB()), targets,
- proto.getOffset(), proto.getCorrupt());
+ proto.getOffset(), proto.getCorrupt(),
+ cachedLocs.toArray(new DatanodeInfo[0]));
lb.setBlockToken(PBHelper.convert(proto.getBlockToken()));
+
return lb;
}
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationManager.java Wed Oct 9 21:30:08 2013
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.protocol.B
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
@@ -188,6 +189,14 @@ public class CacheReplicationManager ext
return bc == null ? 0 : bc.getCacheReplication();
}
+ public void setCachedLocations(LocatedBlock block) {
+ BlockInfo blockInfo = cachedBlocksMap.getStoredBlock(
+ block.getBlock().getLocalBlock());
+ for (int i=0; i<blockInfo.numNodes(); i++) {
+ block.addCachedLoc(blockInfo.getDatanode(i));
+ }
+ }
+
/**
* Returns the number of cached replicas of a block
*/
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Wed Oct 9 21:30:08 2013
@@ -1442,6 +1442,10 @@ public class FSNamesystem implements Nam
blockManager.getDatanodeManager().sortLocatedBlocks(
clientMachine, lastBlockList);
}
+ // Set caching information for the block list
+ for (LocatedBlock lb: blocks.getLocatedBlocks()) {
+ cacheReplicationManager.setCachedLocations(lb);
+ }
}
return blocks;
}
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto Wed Oct 9 21:30:08 2013
@@ -128,6 +128,7 @@ message LocatedBlockProto {
// their locations are not part of this object
required hadoop.common.TokenProto blockToken = 5;
+ repeated bool isCached = 6 [packed=true]; // if a location in locs is cached
}
message DataEncryptionKeyProto {
Modified: hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java?rev=1530802&r1=1530801&r2=1530802&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java (original)
+++ hadoop/common/branches/HDFS-4949/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestCacheReplicationManager.java Wed Oct 9 21:30:08 2013
@@ -32,6 +32,7 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystemTestHelper;
+import org.apache.hadoop.fs.HdfsBlockLocation;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
@@ -39,6 +40,7 @@ import org.apache.hadoop.hdfs.Distribute
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.PathBasedCacheDescriptor;
import org.apache.hadoop.hdfs.protocol.PathBasedCacheDirective;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -160,6 +162,18 @@ public class TestCacheReplicationManager
descriptor.getPool());
expected += numBlocksPerFile;
waitForExpectedNumCachedBlocks(expected);
+ HdfsBlockLocation[] locations =
+ (HdfsBlockLocation[]) dfs.getFileBlockLocations(
+ new Path(paths.get(i)), 0, numBlocksPerFile * BLOCK_SIZE);
+ assertEquals("Unexpected number of locations", numBlocksPerFile,
+ locations.length);
+ for (HdfsBlockLocation loc: locations) {
+ assertEquals("Block should be present on all datanodes",
+ 3, loc.getHosts().length);
+ DatanodeInfo[] cachedLocs = loc.getLocatedBlock().getCachedLocations();
+ assertEquals("Block should be cached on all datanodes",
+ loc.getHosts().length, cachedLocs.length);
+ }
}
// Uncache and check each path in sequence
RemoteIterator<PathBasedCacheDescriptor> entries =