You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ha...@apache.org on 2008/12/03 20:07:53 UTC
svn commit: r723016 - in /hadoop/core/branches/branch-0.19: ./
src/hdfs/org/apache/hadoop/hdfs/server/datanode/
src/test/org/apache/hadoop/hdfs/server/datanode/
Author: hairong
Date: Wed Dec 3 11:07:52 2008
New Revision: 723016
URL: http://svn.apache.org/viewvc?rev=723016&view=rev
Log:
Merge -r 723012:723013 from trunk to move the change log of HADOOP-4679 into the branch 0.19
Added:
hadoop/core/branches/branch-0.19/src/test/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
- copied unchanged from r723013, hadoop/core/trunk/src/test/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
Modified:
hadoop/core/branches/branch-0.19/ (props changed)
hadoop/core/branches/branch-0.19/CHANGES.txt (contents, props changed)
hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
Propchange: hadoop/core/branches/branch-0.19/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Dec 3 11:07:52 2008
@@ -1 +1 @@
-/hadoop/core/trunk:697306,698176,699056,699098,699415,699424,699444,699490,699517,700163,700628,700923,701273,701398,703923,704203,704261,704701,704703,704707,704712,704732,704748,704989,705391,705420,705430,705762,706350,706707,706719,706796,706802,707258,707262,708623,708641,708710,709040,709303,712881,713888,720602
+/hadoop/core/trunk:697306,698176,699056,699098,699415,699424,699444,699490,699517,700163,700628,700923,701273,701398,703923,704203,704261,704701,704703,704707,704712,704732,704748,704989,705391,705420,705430,705762,706350,706707,706719,706796,706802,707258,707262,708623,708641,708710,709040,709303,712881,713888,720602,723013
Modified: hadoop/core/branches/branch-0.19/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/CHANGES.txt?rev=723016&r1=723015&r2=723016&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.19/CHANGES.txt Wed Dec 3 11:07:52 2008
@@ -1048,6 +1048,9 @@
HADOOP-4726. Fix documentation typos "the the". (Edward J. Yoon via
szetszwo)
+ HADOOP-4679. Datanode prints tons of log messages: waiting for threadgroup
+ to exit, active threads is XX. (hairong)
+
Release 0.18.2 - 2008-11-03
BUG FIXES
Propchange: hadoop/core/branches/branch-0.19/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Wed Dec 3 11:07:52 2008
@@ -1 +1 @@
-/hadoop/core/trunk/CHANGES.txt:697306,698176,699056,699098,699415,699424,699444,699490,699517,700163,700628,700923,701273,701398,703923,704203,704261,704701,704703,704707,704712,704732,704748,704989,705391,705420,705430,705762,706350,706707,706719,706796,706802,707258,707262,708623,708641,708710,708723,709040,709303,711717,712881,713888,720602
+/hadoop/core/trunk/CHANGES.txt:697306,698176,699056,699098,699415,699424,699444,699490,699517,700163,700628,700923,701273,701398,703923,704203,704261,704701,704703,704707,704712,704732,704748,704989,705391,705420,705430,705762,706350,706707,706719,706796,706802,707258,707262,708623,708641,708710,708723,709040,709303,711717,712881,713888,720602,723013
Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java?rev=723016&r1=723015&r2=723016&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java Wed Dec 3 11:07:52 2008
@@ -23,7 +23,6 @@
import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
-import java.net.InetAddress;
import java.nio.ByteBuffer;
import java.util.LinkedList;
import java.util.zip.CRC32;
@@ -110,6 +109,11 @@
}
} catch(IOException ioe) {
IOUtils.closeStream(this);
+ IOException cause = FSDataset.getCauseIfDiskError(ioe);
+ if (cause != null) { // possible disk error
+ ioe = cause;
+ datanode.checkDiskError(ioe);
+ }
throw ioe;
}
}
Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=723016&r1=723015&r2=723016&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java Wed Dec 3 11:07:52 2008
@@ -297,6 +297,7 @@
ServerSocketChannel.open().socket() : new ServerSocket();
Server.bind(ss, socAddr, 0);
ss.setReceiveBufferSize(DEFAULT_DATA_SOCKET_SIZE);
+ ss.setSoTimeout(conf.getInt("dfs.dataXceiver.timeoutInMS", 30000)); //30s
// adjust machine name with the actual port
tmpPort = ss.getLocalPort();
selfAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(),
@@ -539,6 +540,8 @@
/**
* Shut down this instance of the datanode.
* Returns only after shutdown is complete.
+ * This method can only be called by the offerService thread.
+ * Otherwise, deadlock might occur.
*/
public void shutdown() {
if (infoServer != null) {
@@ -605,7 +608,8 @@
* when IOException occurs.
* If so, handle the error */
protected void checkDiskError( IOException e ) throws IOException {
- if (e.getMessage().startsWith("No space left on device")) {
+ if (e.getMessage() != null &&
+ e.getMessage().startsWith("No space left on device")) {
throw new DiskOutOfSpaceException("No space left on device");
} else {
checkDiskError();
@@ -623,12 +627,12 @@
private void handleDiskError(String errMsgr) {
LOG.warn("DataNode is shutting down.\n" + errMsgr);
+ shouldRun = false;
try {
namenode.errorReport(
dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr);
} catch(IOException ignored) {
}
- shutdown();
}
/** Number of concurrent xceivers per node. */
@@ -818,7 +822,9 @@
case DatanodeProtocol.DNA_REGISTER:
// namenode requested a registration - at start or if NN lost contact
LOG.info("DatanodeCommand action: DNA_REGISTER");
- register();
+ if (shouldRun) {
+ register();
+ }
break;
case DatanodeProtocol.DNA_FINALIZE:
storage.finalizeUpgrade();
@@ -1131,6 +1137,10 @@
dn.dataNodeThread.start();
}
}
+
+ static boolean isDatanodeUp(DataNode dn) {
+ return dn.dataNodeThread != null && dn.dataNodeThread.isAlive();
+ }
/** Instantiate a single datanode object. This must be run by invoking
* {@link DataNode#runDatanodeDaemon(DataNode)} subsequently.
Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java?rev=723016&r1=723015&r2=723016&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java Wed Dec 3 11:07:52 2008
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.net.ServerSocket;
import java.net.Socket;
+import java.net.SocketTimeoutException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
@@ -130,6 +131,8 @@
s.setTcpNoDelay(true);
new Daemon(datanode.threadGroup,
new DataXceiver(s, datanode, this)).start();
+ } catch (SocketTimeoutException ignored) {
+ // wake up to see if should continue to run
} catch (IOException ie) {
LOG.warn(datanode.dnRegistration + ":DataXceiveServer: "
+ StringUtils.stringifyException(ie));
Modified: hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java?rev=723016&r1=723015&r2=723016&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java (original)
+++ hadoop/core/branches/branch-0.19/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java Wed Dec 3 11:07:52 2008
@@ -395,7 +395,13 @@
}
// Create the zero-length temp file
//
- if (!f.createNewFile()) {
+ boolean fileCreated = false;
+ try {
+ fileCreated = f.createNewFile();
+ } catch (IOException ioe) {
+ throw (IOException)new IOException(DISK_ERROR +f).initCause(ioe);
+ }
+ if (!fileCreated) {
throw new IOException("Unexpected problem in creating temporary file for "+
b + ". File " + f + " should be creatable, but is already present.");
}
@@ -942,6 +948,20 @@
}
}
+ private final static String DISK_ERROR = "Possible disk error on file creation: ";
+ /** Get the cause of an I/O exception if caused by a possible disk error
+ * @param ioe an I/O exception
+ * @return cause if the I/O exception is caused by a possible disk error;
+ * null otherwise.
+ */
+ static IOException getCauseIfDiskError(IOException ioe) {
+ if (ioe.getMessage()!=null && ioe.getMessage().startsWith(DISK_ERROR)) {
+ return (IOException)ioe.getCause();
+ } else {
+ return null;
+ }
+ }
+
/**
* Start writing to a block file
* If isRecovery is true and the block pre-exists, then we kill all