You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by sz...@apache.org on 2014/08/19 01:41:56 UTC
svn commit: r1618764 - in
/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs: ./
src/main/java/ src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/
src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/
src/main/java/org/ap...
Author: szetszwo
Date: Mon Aug 18 23:41:53 2014
New Revision: 1618764
URL: http://svn.apache.org/r1618764
Log:
Merge r1609845 through r1618763 from trunk.
Added:
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/WhitelistBasedTrustedChannelResolver.java
- copied unchanged from r1618763, hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/WhitelistBasedTrustedChannelResolver.java
Modified:
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/ (props changed)
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/ (props changed)
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
Propchange: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs:r1618417-1618763
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Mon Aug 18 23:41:53 2014
@@ -425,6 +425,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6850. Move NFS out of order write unit tests into TestWrites class.
(Zhe Zhang via atm)
+ HDFS-6188. An ip whitelist based implementation of TrustedChannelResolver.
+ (Benoy Antony via Arpit Agarwal)
+
OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang)
@@ -534,6 +537,12 @@ Release 2.6.0 - UNRELEASED
HDFS-6783. Fix HDFS CacheReplicationMonitor rescan logic. (Yi Liu and Colin Patrick McCabe via umamahesh)
+ HDFS-6825. Edit log corruption due to delayed block removal.
+ (Yongjun Zhang via wang)
+
+ HDFS-6569. OOB message can't be sent to the client when DataNode shuts down for upgrade
+ (brandonli)
+
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES
Propchange: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/
------------------------------------------------------------------------------
Merged /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java:r1618417-1618763
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java Mon Aug 18 23:41:53 2014
@@ -373,12 +373,14 @@ public class BlockInfoUnderConstruction
sb.append("{blockUCState=").append(blockUCState)
.append(", primaryNodeIndex=").append(primaryNodeIndex)
.append(", replicas=[");
- Iterator<ReplicaUnderConstruction> iter = replicas.iterator();
- if (iter.hasNext()) {
- iter.next().appendStringTo(sb);
- while (iter.hasNext()) {
- sb.append(", ");
+ if (replicas != null) {
+ Iterator<ReplicaUnderConstruction> iter = replicas.iterator();
+ if (iter.hasNext()) {
iter.next().appendStringTo(sb);
+ while (iter.hasNext()) {
+ sb.append(", ");
+ iter.next().appendStringTo(sb);
+ }
}
}
sb.append("]}");
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java Mon Aug 18 23:41:53 2014
@@ -286,19 +286,19 @@ public class CacheReplicationMonitor ext
private void rescan() throws InterruptedException {
scannedDirectives = 0;
scannedBlocks = 0;
- namesystem.writeLock();
try {
- lock.lock();
- if (shutdown) {
- throw new InterruptedException("CacheReplicationMonitor was " +
- "shut down.");
+ namesystem.writeLock();
+ try {
+ lock.lock();
+ if (shutdown) {
+ throw new InterruptedException("CacheReplicationMonitor was " +
+ "shut down.");
+ }
+ curScanCount = completedScanCount + 1;
+ } finally {
+ lock.unlock();
}
- curScanCount = completedScanCount + 1;
- }
- finally {
- lock.unlock();
- }
- try {
+
resetStatistics();
rescanCacheDirectives();
rescanCachedBlockMap();
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java Mon Aug 18 23:41:53 2014
@@ -738,7 +738,12 @@ class BlockReceiver implements Closeable
LOG.warn("Error managing cache for writer of block " + block, t);
}
}
-
+
+ public void sendOOB() throws IOException, InterruptedException {
+ ((PacketResponder) responder.getRunnable()).sendOOBResponse(PipelineAck
+ .getRestartOOBStatus());
+ }
+
void receiveBlock(
DataOutputStream mirrOut, // output to next datanode
DataInputStream mirrIn, // input from next datanode
@@ -830,9 +835,7 @@ class BlockReceiver implements Closeable
// The worst case is not recovering this RBW replica.
// Client will fall back to regular pipeline recovery.
}
- try {
- ((PacketResponder) responder.getRunnable()).
- sendOOBResponse(PipelineAck.getRestartOOBStatus());
+ try {
// Even if the connection is closed after the ack packet is
// flushed, the client can react to the connection closure
// first. Insert a delay to lower the chance of client
@@ -840,8 +843,6 @@ class BlockReceiver implements Closeable
Thread.sleep(1000);
} catch (InterruptedException ie) {
// It is already going down. Ignore this.
- } catch (IOException ioe) {
- LOG.info("Error sending OOB Ack.", ioe);
}
}
responder.interrupt();
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java Mon Aug 18 23:41:53 2014
@@ -270,6 +270,7 @@ public class DataNode extends Configured
public final static String EMPTY_DEL_HINT = "";
final AtomicInteger xmitsInProgress = new AtomicInteger();
Daemon dataXceiverServer = null;
+ DataXceiverServer xserver = null;
Daemon localDataXceiverServer = null;
ShortCircuitRegistry shortCircuitRegistry = null;
ThreadGroup threadGroup = null;
@@ -649,8 +650,8 @@ public class DataNode extends Configured
streamingAddr = tcpPeerServer.getStreamingAddr();
LOG.info("Opened streaming server at " + streamingAddr);
this.threadGroup = new ThreadGroup("dataXceiverServer");
- this.dataXceiverServer = new Daemon(threadGroup,
- new DataXceiverServer(tcpPeerServer, conf, this));
+ xserver = new DataXceiverServer(tcpPeerServer, conf, this);
+ this.dataXceiverServer = new Daemon(threadGroup, xserver);
this.threadGroup.setDaemon(true); // auto destroy when empty
if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
@@ -1138,6 +1139,11 @@ public class DataNode extends Configured
}
@VisibleForTesting
+ public DataXceiverServer getXferServer() {
+ return xserver;
+ }
+
+ @VisibleForTesting
public int getXferPort() {
return streamingAddr.getPort();
}
@@ -1395,6 +1401,7 @@ public class DataNode extends Configured
// in order to avoid any further acceptance of requests, but the peers
// for block writes are not closed until the clients are notified.
if (dataXceiverServer != null) {
+ xserver.sendOOBToPeers();
((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
this.dataXceiverServer.interrupt();
}
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java Mon Aug 18 23:41:53 2014
@@ -103,7 +103,8 @@ class DataXceiver extends Receiver imple
private long opStartTime; //the start time of receiving an Op
private final InputStream socketIn;
private OutputStream socketOut;
-
+ private BlockReceiver blockReceiver = null;
+
/**
* Client Name used in previous operation. Not available on first request
* on the socket.
@@ -159,6 +160,12 @@ class DataXceiver extends Receiver imple
return socketOut;
}
+ public void sendOOB() throws IOException, InterruptedException {
+ LOG.info("Sending OOB to peer: " + peer);
+ if(blockReceiver!=null)
+ blockReceiver.sendOOB();
+ }
+
/**
* Read/write data from/to the DataXceiverServer.
*/
@@ -168,7 +175,7 @@ class DataXceiver extends Receiver imple
Op op = null;
try {
- dataXceiverServer.addPeer(peer, Thread.currentThread());
+ dataXceiverServer.addPeer(peer, Thread.currentThread(), this);
peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
InputStream input = socketIn;
IOStreamPair saslStreams = datanode.saslServer.receive(peer, socketOut,
@@ -584,7 +591,6 @@ class DataXceiver extends Receiver imple
DataOutputStream mirrorOut = null; // stream to next target
DataInputStream mirrorIn = null; // reply from next target
Socket mirrorSock = null; // socket to next target
- BlockReceiver blockReceiver = null; // responsible for data handling
String mirrorNode = null; // the name:port of next target
String firstBadLink = ""; // first datanode that failed in connection setup
Status mirrorInStatus = SUCCESS;
@@ -747,6 +753,7 @@ class DataXceiver extends Receiver imple
IOUtils.closeStream(replyOut);
IOUtils.closeSocket(mirrorSock);
IOUtils.closeStream(blockReceiver);
+ blockReceiver = null;
}
//update metrics
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java Mon Aug 18 23:41:53 2014
@@ -27,11 +27,11 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.hdfs.net.PeerServer;
-import org.apache.hadoop.hdfs.server.balancer.Balancer;
import org.apache.hadoop.hdfs.util.DataTransferThrottler;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Daemon;
+import com.google.common.annotations.VisibleForTesting;
/**
* Server used for receiving/sending a block of data.
@@ -45,6 +45,7 @@ class DataXceiverServer implements Runna
private final PeerServer peerServer;
private final DataNode datanode;
private final HashMap<Peer, Thread> peers = new HashMap<Peer, Thread>();
+ private final HashMap<Peer, DataXceiver> peersXceiver = new HashMap<Peer, DataXceiver>();
private boolean closed = false;
/**
@@ -217,18 +218,38 @@ class DataXceiverServer implements Runna
}
}
- synchronized void addPeer(Peer peer, Thread t) throws IOException {
+ synchronized void addPeer(Peer peer, Thread t, DataXceiver xceiver)
+ throws IOException {
if (closed) {
throw new IOException("Server closed.");
}
peers.put(peer, t);
+ peersXceiver.put(peer, xceiver);
}
synchronized void closePeer(Peer peer) {
peers.remove(peer);
+ peersXceiver.remove(peer);
IOUtils.cleanup(null, peer);
}
+ // Sending OOB to all peers
+ public synchronized void sendOOBToPeers() {
+ if (!datanode.shutdownForUpgrade) {
+ return;
+ }
+
+ for (Peer p : peers.keySet()) {
+ try {
+ peersXceiver.get(p).sendOOB();
+ } catch (IOException e) {
+ LOG.warn("Got error when sending OOB message.", e);
+ } catch (InterruptedException e) {
+ LOG.warn("Interrupted when sending OOB message.");
+ }
+ }
+ }
+
// Notify all peers of the shutdown and restart.
// datanode.shouldRun should still be true and datanode.restarting should
// be set true before calling this method.
@@ -247,6 +268,7 @@ class DataXceiverServer implements Runna
IOUtils.cleanup(LOG, p);
}
peers.clear();
+ peersXceiver.clear();
}
// Return the number of peers.
@@ -254,7 +276,14 @@ class DataXceiverServer implements Runna
return peers.size();
}
+ // Return the number of peers and DataXceivers.
+ @VisibleForTesting
+ synchronized int getNumPeersXceiver() {
+ return peersXceiver.size();
+ }
+
synchronized void releasePeer(Peer peer) {
peers.remove(peer);
+ peersXceiver.remove(peer);
}
}
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Mon Aug 18 23:41:53 2014
@@ -4350,7 +4350,30 @@ public class FSNamesystem implements Nam
throw new IOException("Block (=" + lastblock + ") not found");
}
}
- INodeFile iFile = ((INode)storedBlock.getBlockCollection()).asFile();
+ //
+ // The implementation of delete operation (see @deleteInternal method)
+ // first removes the file paths from namespace, and delays the removal
+ // of blocks to later time for better performance. When
+ // commitBlockSynchronization (this method) is called in between, the
+ // blockCollection of storedBlock could have been assigned to null by
+ // the delete operation, throw IOException here instead of NPE; if the
+ // file path is already removed from namespace by the delete operation,
+ // throw FileNotFoundException here, so not to proceed to the end of
+ // this method to add a CloseOp to the edit log for an already deleted
+ // file (See HDFS-6825).
+ //
+ BlockCollection blockCollection = storedBlock.getBlockCollection();
+ if (blockCollection == null) {
+ throw new IOException("The blockCollection of " + storedBlock
+ + " is null, likely because the file owning this block was"
+ + " deleted and the block removal is delayed");
+ }
+ INodeFile iFile = ((INode)blockCollection).asFile();
+ if (isFileDeleted(iFile)) {
+ throw new FileNotFoundException("File not found: "
+ + iFile.getFullPathName() + ", likely due to delayed block"
+ + " removal");
+ }
if (!iFile.isUnderConstruction() || storedBlock.isComplete()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Unexpected block (=" + lastblock
@@ -6349,9 +6372,28 @@ public class FSNamesystem implements Nam
private boolean isFileDeleted(INodeFile file) {
// Not in the inodeMap or in the snapshot but marked deleted.
- if (dir.getInode(file.getId()) == null ||
- file.getParent() == null || (file.isWithSnapshot() &&
- file.getFileWithSnapshotFeature().isCurrentFileDeleted())) {
+ if (dir.getInode(file.getId()) == null) {
+ return true;
+ }
+
+ // look at the path hierarchy to see if one parent is deleted by recursive
+ // deletion
+ INode tmpChild = file;
+ INodeDirectory tmpParent = file.getParent();
+ while (true) {
+ if (tmpParent == null ||
+ tmpParent.searchChildren(tmpChild.getLocalNameBytes()) < 0) {
+ return true;
+ }
+ if (tmpParent.isRoot()) {
+ break;
+ }
+ tmpChild = tmpParent;
+ tmpParent = tmpParent.getParent();
+ }
+
+ if (file.isWithSnapshot() &&
+ file.getFileWithSnapshotFeature().isCurrentFileDeleted()) {
return true;
}
return false;
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java Mon Aug 18 23:41:53 2014
@@ -176,7 +176,7 @@ public class INodeDirectory extends INod
return quota;
}
- private int searchChildren(byte[] name) {
+ int searchChildren(byte[] name) {
return children == null? -1: Collections.binarySearch(children, name);
}
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java Mon Aug 18 23:41:53 2014
@@ -44,6 +44,9 @@ import org.apache.hadoop.hdfs.protocol.d
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
@@ -1300,4 +1303,33 @@ public class DFSTestUtil {
sockDir.close();
}
}
+
+ /**
+ * @return the node which is expected to run the recovery of the
+ * given block, which is known to be under construction inside the
+ * given NameNOde.
+ */
+ public static DatanodeDescriptor getExpectedPrimaryNode(NameNode nn,
+ ExtendedBlock blk) {
+ BlockManager bm0 = nn.getNamesystem().getBlockManager();
+ BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock());
+ assertTrue("Block " + blk + " should be under construction, " +
+ "got: " + storedBlock,
+ storedBlock instanceof BlockInfoUnderConstruction);
+ BlockInfoUnderConstruction ucBlock =
+ (BlockInfoUnderConstruction)storedBlock;
+ // We expect that the replica with the most recent heart beat will be
+ // the one to be in charge of the synchronization / recovery protocol.
+ final DatanodeStorageInfo[] storages = ucBlock.getExpectedStorageLocations();
+ DatanodeStorageInfo expectedPrimary = storages[0];
+ long mostRecentLastUpdate = expectedPrimary.getDatanodeDescriptor().getLastUpdate();
+ for (int i = 1; i < storages.length; i++) {
+ final long lastUpdate = storages[i].getDatanodeDescriptor().getLastUpdate();
+ if (lastUpdate > mostRecentLastUpdate) {
+ expectedPrimary = storages[i];
+ mostRecentLastUpdate = lastUpdate;
+ }
+ }
+ return expectedPrimary.getDatanodeDescriptor();
+ }
}
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeRollingUpgrade.java Mon Aug 18 23:41:53 2014
@@ -27,11 +27,14 @@ import static org.junit.Assert.assertTru
import java.io.File;
import java.io.IOException;
+import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.DFSOutputStream;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -67,6 +70,7 @@ public class TestDataNodeRollingUpgrade
private void startCluster() throws IOException {
conf = new HdfsConfiguration();
+ conf.setInt("dfs.blocksize", 1024*1024);
cluster = new Builder(conf).numDataNodes(REPL_FACTOR).build();
cluster.waitActive();
fs = cluster.getFileSystem();
@@ -243,4 +247,48 @@ public class TestDataNodeRollingUpgrade
shutdownCluster();
}
}
+
+ @Test (timeout=600000)
+ // Test DatanodeXceiver has correct peer-dataxceiver pairs for sending OOB message
+ public void testDatanodePeersXceiver() throws Exception {
+ try {
+ startCluster();
+
+ // Create files in DFS.
+ String testFile1 = "/TestDataNodeXceiver1.dat";
+ String testFile2 = "/TestDataNodeXceiver2.dat";
+ String testFile3 = "/TestDataNodeXceiver3.dat";
+
+ DFSClient client1 = new DFSClient(NameNode.getAddress(conf), conf);
+ DFSClient client2 = new DFSClient(NameNode.getAddress(conf), conf);
+ DFSClient client3 = new DFSClient(NameNode.getAddress(conf), conf);
+
+ DFSOutputStream s1 = (DFSOutputStream) client1.create(testFile1, true);
+ DFSOutputStream s2 = (DFSOutputStream) client2.create(testFile2, true);
+ DFSOutputStream s3 = (DFSOutputStream) client3.create(testFile3, true);
+
+ byte[] toWrite = new byte[1024*1024*8];
+ Random rb = new Random(1111);
+ rb.nextBytes(toWrite);
+ s1.write(toWrite, 0, 1024*1024*8);
+ s1.flush();
+ s2.write(toWrite, 0, 1024*1024*8);
+ s2.flush();
+ s3.write(toWrite, 0, 1024*1024*8);
+ s3.flush();
+
+ assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer()
+ .getNumPeersXceiver());
+ s1.close();
+ s2.close();
+ s3.close();
+ assertTrue(dn.getXferServer().getNumPeersXceiver() == dn.getXferServer()
+ .getNumPeersXceiver());
+ client1.close();
+ client2.close();
+ client3.close();
+ } finally {
+ shutdownCluster();
+ }
+ }
}
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCommitBlockSynchronization.java Mon Aug 18 23:41:53 2014
@@ -50,6 +50,17 @@ public class TestCommitBlockSynchronizat
FSNamesystem namesystem = new FSNamesystem(conf, image);
namesystem.setImageLoaded(true);
+
+ // set file's parent as root and put the file to inodeMap, so
+ // FSNamesystem's isFileDeleted() method will return false on this file
+ if (file.getParent() == null) {
+ INodeDirectory parent = mock(INodeDirectory.class);
+ parent.setLocalName(new byte[0]);
+ parent.addChild(file);
+ file.setParent(parent);
+ }
+ namesystem.dir.getINodeMap().put(file);
+
FSNamesystem namesystemSpy = spy(namesystem);
BlockInfoUnderConstruction blockInfo = new BlockInfoUnderConstruction(
block, 1, HdfsServerConstants.BlockUCState.UNDER_CONSTRUCTION, targets);
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java Mon Aug 18 23:41:53 2014
@@ -18,7 +18,9 @@
package org.apache.hadoop.hdfs.server.namenode;
import java.io.FileNotFoundException;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
@@ -28,18 +30,29 @@ import org.apache.hadoop.fs.FSDataOutput
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.BlockStoragePolicy;
+import org.apache.hadoop.hdfs.AppendTestUtil;
import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
+import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.junit.Assert;
import org.junit.Test;
+import org.mockito.Mockito;
import org.mockito.internal.util.reflection.Whitebox;
@@ -49,6 +62,7 @@ import org.mockito.internal.util.reflect
* whole duration.
*/
public class TestDeleteRace {
+ private static final int BLOCK_SIZE = 4096;
private static final Log LOG = LogFactory.getLog(TestDeleteRace.class);
private static final Configuration conf = new HdfsConfiguration();
private MiniDFSCluster cluster;
@@ -201,7 +215,126 @@ public class TestDeleteRace {
cluster.shutdown();
}
}
+ }
+
+ /**
+ * Test race between delete operation and commitBlockSynchronization method.
+ * See HDFS-6825.
+ * @param hasSnapshot
+ * @throws Exception
+ */
+ private void testDeleteAndCommitBlockSynchronizationRace(boolean hasSnapshot)
+ throws Exception {
+ LOG.info("Start testing, hasSnapshot: " + hasSnapshot);
+ final String testPaths[] = {
+ "/test-file",
+ "/testdir/testdir1/test-file"
+ };
+ final Path rootPath = new Path("/");
+ final Configuration conf = new Configuration();
+ // Disable permissions so that another user can recover the lease.
+ conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
+ conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+ FSDataOutputStream stm = null;
+ Map<DataNode, DatanodeProtocolClientSideTranslatorPB> dnMap =
+ new HashMap<DataNode, DatanodeProtocolClientSideTranslatorPB>();
+
+ try {
+ cluster = new MiniDFSCluster.Builder(conf)
+ .numDataNodes(3)
+ .build();
+ cluster.waitActive();
+
+ DistributedFileSystem fs = cluster.getFileSystem();
+ int stId = 0;
+ for (String testPath : testPaths) {
+ LOG.info("test on " + testPath + " snapshot: " + hasSnapshot);
+ Path fPath = new Path(testPath);
+ //find grandest non-root parent
+ Path grandestNonRootParent = fPath;
+ while (!grandestNonRootParent.getParent().equals(rootPath)) {
+ grandestNonRootParent = grandestNonRootParent.getParent();
+ }
+ stm = fs.create(fPath);
+ LOG.info("test on " + testPath + " created " + fPath);
+
+ // write a half block
+ AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
+ stm.hflush();
+
+ if (hasSnapshot) {
+ SnapshotTestHelper.createSnapshot(fs, rootPath,
+ "st" + String.valueOf(stId));
+ ++stId;
+ }
+
+ // Look into the block manager on the active node for the block
+ // under construction.
+ NameNode nn = cluster.getNameNode();
+ ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, fPath);
+ DatanodeDescriptor expectedPrimary =
+ DFSTestUtil.getExpectedPrimaryNode(nn, blk);
+ LOG.info("Expecting block recovery to be triggered on DN " +
+ expectedPrimary);
+
+ // Find the corresponding DN daemon, and spy on its connection to the
+ // active.
+ DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
+ DatanodeProtocolClientSideTranslatorPB nnSpy = dnMap.get(primaryDN);
+ if (nnSpy == null) {
+ nnSpy = DataNodeTestUtils.spyOnBposToNN(primaryDN, nn);
+ dnMap.put(primaryDN, nnSpy);
+ }
+
+ // Delay the commitBlockSynchronization call
+ DelayAnswer delayer = new DelayAnswer(LOG);
+ Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
+ Mockito.eq(blk),
+ Mockito.anyInt(), // new genstamp
+ Mockito.anyLong(), // new length
+ Mockito.eq(true), // close file
+ Mockito.eq(false), // delete block
+ (DatanodeID[]) Mockito.anyObject(), // new targets
+ (String[]) Mockito.anyObject()); // new target storages
+
+ fs.recoverLease(fPath);
+
+ LOG.info("Waiting for commitBlockSynchronization call from primary");
+ delayer.waitForCall();
+
+ LOG.info("Deleting recursively " + grandestNonRootParent);
+ fs.delete(grandestNonRootParent, true);
+
+ delayer.proceed();
+ LOG.info("Now wait for result");
+ delayer.waitForResult();
+ Throwable t = delayer.getThrown();
+ if (t != null) {
+ LOG.info("Result exception (snapshot: " + hasSnapshot + "): " + t);
+ }
+ } // end of loop each fPath
+ LOG.info("Now check we can restart");
+ cluster.restartNameNodes();
+ LOG.info("Restart finished");
+ } finally {
+ if (stm != null) {
+ IOUtils.closeStream(stm);
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+ }
+ @Test(timeout=600000)
+ public void testDeleteAndCommitBlockSynchonizationRaceNoSnapshot()
+ throws Exception {
+ testDeleteAndCommitBlockSynchronizationRace(false);
+ }
+ @Test(timeout=600000)
+ public void testDeleteAndCommitBlockSynchronizationRaceHasSnapshot()
+ throws Exception {
+ testDeleteAndCommitBlockSynchronizationRace(true);
}
}
Modified: hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java?rev=1618764&r1=1618763&r2=1618764&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java (original)
+++ hadoop/common/branches/HDFS-6584/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java Mon Aug 18 23:41:53 2014
@@ -356,7 +356,8 @@ public class TestPipelinesFailover {
NameNode nn0 = cluster.getNameNode(0);
ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
- DatanodeDescriptor expectedPrimary = getExpectedPrimaryNode(nn0, blk);
+ DatanodeDescriptor expectedPrimary =
+ DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
LOG.info("Expecting block recovery to be triggered on DN " +
expectedPrimary);
@@ -506,37 +507,6 @@ public class TestPipelinesFailover {
}
}
-
-
- /**
- * @return the node which is expected to run the recovery of the
- * given block, which is known to be under construction inside the
- * given NameNOde.
- */
- private DatanodeDescriptor getExpectedPrimaryNode(NameNode nn,
- ExtendedBlock blk) {
- BlockManager bm0 = nn.getNamesystem().getBlockManager();
- BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock());
- assertTrue("Block " + blk + " should be under construction, " +
- "got: " + storedBlock,
- storedBlock instanceof BlockInfoUnderConstruction);
- BlockInfoUnderConstruction ucBlock =
- (BlockInfoUnderConstruction)storedBlock;
- // We expect that the replica with the most recent heart beat will be
- // the one to be in charge of the synchronization / recovery protocol.
- final DatanodeStorageInfo[] storages = ucBlock.getExpectedStorageLocations();
- DatanodeStorageInfo expectedPrimary = storages[0];
- long mostRecentLastUpdate = expectedPrimary.getDatanodeDescriptor().getLastUpdate();
- for (int i = 1; i < storages.length; i++) {
- final long lastUpdate = storages[i].getDatanodeDescriptor().getLastUpdate();
- if (lastUpdate > mostRecentLastUpdate) {
- expectedPrimary = storages[i];
- mostRecentLastUpdate = lastUpdate;
- }
- }
- return expectedPrimary.getDatanodeDescriptor();
- }
-
private DistributedFileSystem createFsAsOtherUser(
final MiniDFSCluster cluster, final Configuration conf)
throws IOException, InterruptedException {