You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by om...@apache.org on 2011/03/04 05:25:04 UTC
svn commit: r1077532 - in
/hadoop/common/branches/branch-0.20-security-patches/src:
hdfs/org/apache/hadoop/hdfs/protocol/
hdfs/org/apache/hadoop/hdfs/server/datanode/
hdfs/org/apache/hadoop/hdfs/server/namenode/
hdfs/org/apache/hadoop/hdfs/server/proto...
Author: omalley
Date: Fri Mar 4 04:25:04 2011
New Revision: 1077532
URL: http://svn.apache.org/viewvc?rev=1077532&view=rev
Log:
commit 33d30c3ca1902d671fec311f085ef50e6a105f16
Author: Konstantin Shvachko <sh...@cdev6023.inktomisearch.com>
Date: Wed Jul 7 02:13:15 2010 +0000
HDFS-1158 from https://issues.apache.org/jira/secure/attachment/12448848/rev-HDFS-457.patch
+++ b/YAHOO-CHANGES.txt
+ HDFS-1158. Revert HDFS-457. (shv)
+
Modified:
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java
hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/protocol/FSConstants.java Fri Mar 4 04:25:04 2011
@@ -56,7 +56,6 @@ public interface FSConstants {
public static final int DEFAULT_DATA_SOCKET_SIZE = 128 * 1024;
public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
- public static final int MIN_NUM_OF_VALID_VOLUMES = 1;// for a DN to run
// SafeMode actions
public enum SafeModeAction{ SAFEMODE_LEAVE, SAFEMODE_ENTER, SAFEMODE_GET; }
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java Fri Mar 4 04:25:04 2011
@@ -117,9 +117,6 @@ class BlockReceiver implements java.io.C
// check if there is a disk error
IOException cause = FSDataset.getCauseIfDiskError(ioe);
- DataNode.LOG.warn("IOException in BlockReceiver constructor. Cause is ",
- cause);
-
if (cause != null) { // possible disk error
ioe = cause;
datanode.checkDiskError(ioe); // may throw an exception here
@@ -833,14 +830,7 @@ class BlockReceiver implements java.io.C
DataTransferProtocol.OP_STATUS_SUCCESS}).write(replyOut);
replyOut.flush();
} catch (Exception e) {
- LOG.warn("IOException in BlockReceiver.lastNodeRun: ", e);
if (running) {
- try {
- datanode.checkDiskError(e); // may throw an exception here
- } catch (IOException ioe) {
- LOG.warn("DataNode.chekDiskError failed in lastDataNodeRun with: ",
- ioe);
- }
LOG.info("PacketResponder " + block + " " + numTargets +
" Exception " + StringUtils.stringifyException(e));
running = false;
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/DataNode.java Fri Mar 4 04:25:04 2011
@@ -724,14 +724,11 @@ public class DataNode extends Configured
}
- /** Check if there is no space in disk
- * @param e that caused this checkDiskError call
- **/
- protected void checkDiskError(Exception e ) throws IOException {
-
- LOG.warn("checkDiskError: exception: ", e);
-
- if (e.getMessage() != null &&
+ /* Check if there is no space in disk or the disk is read-only
+ * when IOException occurs.
+ * If so, handle the error */
+ protected void checkDiskError( IOException e ) throws IOException {
+ if (e.getMessage() != null &&
e.getMessage().startsWith("No space left on device")) {
throw new DiskOutOfSpaceException("No space left on device");
} else {
@@ -739,11 +736,8 @@ public class DataNode extends Configured
}
}
- /**
- * Check if there is a disk failure and if so, handle the error
- *
- **/
- protected void checkDiskError( ) {
+ /* Check if there is no disk space and if so, handle the error*/
+ protected void checkDiskError( ) throws IOException {
try {
data.checkDataDir();
} catch(DiskErrorException de) {
@@ -752,31 +746,13 @@ public class DataNode extends Configured
}
private void handleDiskError(String errMsgr) {
- boolean hasEnoughResource = data.hasEnoughResource();
- LOG.warn("DataNode.handleDiskError: Keep Running: " + hasEnoughResource);
-
- //if hasEnoughtResource = true - more volumes are available, so we don't want
- // to shutdown DN completely and don't want NN to remove it.
- int dp_error = DatanodeProtocol.DISK_ERROR;
- if(hasEnoughResource == false) {
- // DN will be shutdown and NN should remove it
- dp_error = DatanodeProtocol.FATAL_DISK_ERROR;
- }
- //inform NameNode
+ LOG.warn("DataNode is shutting down.\n" + errMsgr);
+ shouldRun = false;
try {
namenode.errorReport(
- dnRegistration, dp_error, errMsgr);
+ dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr);
} catch(IOException ignored) {
}
-
-
- if(hasEnoughResource) {
- scheduleBlockReport(0);
- return; // do not shutdown
- }
-
- LOG.warn("DataNode is shutting down.\n" + errMsgr);
- shouldRun = false;
}
/** Number of concurrent xceivers per node. */
@@ -1285,9 +1261,6 @@ public class DataNode extends Configured
} catch (IOException ie) {
LOG.warn(dnRegistration + ":Failed to transfer " + b + " to " + targets[0].getName()
+ " got " + StringUtils.stringifyException(ie));
- // check if there are any disk problem
- datanode.checkDiskError();
-
} finally {
xmitsInProgress.getAndDecrement();
IOUtils.closeStream(blockSender);
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDataset.java Fri Mar 4 04:25:04 2011
@@ -17,40 +17,24 @@
*/
package org.apache.hadoop.hdfs.server.datanode;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.FilenameFilter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.RandomAccessFile;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.TreeSet;
+import java.io.*;
+import java.util.*;
import javax.management.NotCompliantMBeanException;
import javax.management.ObjectName;
import javax.management.StandardMBean;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.DF;
-import org.apache.hadoop.fs.DU;
-import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.*;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.FSConstants;
-import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
-import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
import org.apache.hadoop.metrics.util.MBeanUtil;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.DiskChecker;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
-import org.mortbay.log.Log;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
+import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
/**************************************************
* FSDataset manages a set of data blocks. Each block
@@ -501,25 +485,9 @@ public class FSDataset implements FSCons
FSVolumeSet(FSVolume[] volumes) {
this.volumes = volumes;
}
-
- private int numberOfVolumes() {
- return volumes.length;
- }
synchronized FSVolume getNextVolume(long blockSize) throws IOException {
-
- if(volumes.length < 1) {
- throw new DiskOutOfSpaceException("No more available volumes");
- }
-
- // since volumes could've been removed because of the failure
- // make sure we are not out of bounds
- if(curVolume >= volumes.length) {
- curVolume = 0;
- }
-
int startVolume = curVolume;
-
while (true) {
FSVolume volume = volumes[curVolume];
curVolume = (curVolume + 1) % volumes.length;
@@ -566,46 +534,10 @@ public class FSDataset implements FSCons
}
}
- /**
- * goes over all the volumes and checkDir eachone of them
- * if one throws DiskErrorException - removes from the list of active
- * volumes.
- * @return list of all the removed volumes
- */
- synchronized List<FSVolume> checkDirs() {
-
- ArrayList<FSVolume> removed_vols = null;
-
+ synchronized void checkDirs() throws DiskErrorException {
for (int idx = 0; idx < volumes.length; idx++) {
- FSVolume fsv = volumes[idx];
- try {
- fsv.checkDirs();
- } catch (DiskErrorException e) {
- DataNode.LOG.warn("Removing failed volume " + fsv + ": ",e);
- if(removed_vols == null) {
- removed_vols = new ArrayList<FSVolume>(1);
- }
- removed_vols.add(volumes[idx]);
- volumes[idx] = null; //remove the volume
- }
- }
-
- // repair array - copy non null elements
- int removed_size = (removed_vols==null)? 0 : removed_vols.size();
- if(removed_size > 0) {
- FSVolume fsvs[] = new FSVolume [volumes.length-removed_size];
- for(int idx=0,idy=0; idx<volumes.length; idx++) {
- if(volumes[idx] != null) {
- fsvs[idy] = volumes[idx];
- idy++;
- }
- }
- volumes = fsvs; // replace array of volumes
+ volumes[idx].checkDirs();
}
- Log.info("Completed FSVolumeSet.checkDirs. Removed=" + removed_size +
- "volumes. List of current volumes: " + toString());
-
- return removed_vols;
}
public String toString() {
@@ -768,14 +700,7 @@ public class FSDataset implements FSCons
public long getDfsUsed() throws IOException {
return volumes.getDfsUsed();
}
- /**
- * Return true - if there are still valid volumes
- * on the DataNode
- */
- public boolean hasEnoughResource(){
- return volumes.numberOfVolumes() >= MIN_NUM_OF_VALID_VOLUMES;
- }
-
+
/**
* Return total capacity, used and unused
*/
@@ -1301,32 +1226,17 @@ public class FSDataset implements FSCons
* Check whether the given block is a valid one.
*/
public boolean isValidBlock(Block b) {
- File f = null;;
- try {
- f = validateBlockFile(b);
- } catch(IOException e) {
- Log.warn("Block " + b + " is not valid:",e);
- }
-
- return f != null;
+ return validateBlockFile(b) != null;
}
/**
* Find the file corresponding to the block and return it if it exists.
*/
- File validateBlockFile(Block b) throws IOException {
+ File validateBlockFile(Block b) {
//Should we check for metadata file too?
File f = getFile(b);
-
- if(f != null ) {
- if(f.exists())
- return f;
-
- // if file is not null, but doesn't exist - possibly disk failed
- DataNode datanode = DataNode.getDataNode();
- datanode.checkDiskError();
- }
-
+ if(f != null && f.exists())
+ return f;
if (InterDatanodeProtocol.LOG.isDebugEnabled()) {
InterDatanodeProtocol.LOG.debug("b=" + b + ", f=" + f);
}
@@ -1465,51 +1375,10 @@ public class FSDataset implements FSCons
/**
* check if a data directory is healthy
- * if some volumes failed - make sure to remove all the blocks that belong
- * to these volumes
* @throws DiskErrorException
*/
public void checkDataDir() throws DiskErrorException {
- long total_blocks=0, removed_blocks=0;
- List<FSVolume> failed_vols = volumes.checkDirs();
-
- //if there no failed volumes return
- if(failed_vols == null)
- return;
-
- // else
- // remove related blocks
- long mlsec = System.currentTimeMillis();
- synchronized (this) {
- Iterator<Block> ib = volumeMap.keySet().iterator();
- while(ib.hasNext()) {
- Block b = ib.next();
- total_blocks ++;
- // check if the volume block belongs to still valid
- FSVolume vol = volumeMap.get(b).getVolume();
- for(FSVolume fv: failed_vols) {
- if(vol == fv) {
- DataNode.LOG.warn("removing block " + b.getBlockId() + " from vol "
- + vol.dataDir.dir.getAbsolutePath());
- ib.remove();
- removed_blocks++;
- break;
- }
- }
- }
- } // end of sync
- mlsec = System.currentTimeMillis() - mlsec;
- DataNode.LOG.warn(">>>>>>>>>>>>Removed " + removed_blocks + " out of " + total_blocks +
- "(took " + mlsec + " millisecs)");
-
- // report the error
- StringBuilder sb = new StringBuilder();
- for(FSVolume fv : failed_vols) {
- sb.append(fv.dataDir.dir.getAbsolutePath() + ";");
- }
-
- throw new DiskErrorException("DataNode failed volumes:" + sb);
-
+ volumes.checkDirs();
}
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java Fri Mar 4 04:25:04 2011
@@ -264,10 +264,4 @@ public interface FSDatasetInterface exte
* @throws IOException
*/
public void validateBlockMetadata(Block b) throws IOException;
-
- /**
- * checks how many valid storage volumes are there in the DataNode
- * @return true if more then minimum valid volumes left in the FSDataSet
- */
- public boolean hasEnoughResource();
}
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java Fri Mar 4 04:25:04 2011
@@ -865,8 +865,6 @@ public class NameNode implements ClientP
}
verifyRequest(nodeReg);
if (errorCode == DatanodeProtocol.DISK_ERROR) {
- LOG.warn("Volume failed on " + dnName);
- } else if (errorCode == DatanodeProtocol.FATAL_DISK_ERROR) {
namesystem.removeDatanode(nodeReg);
}
}
Modified: hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/hdfs/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java Fri Mar 4 04:25:04 2011
@@ -50,9 +50,8 @@ public interface DatanodeProtocol extend
// error code
final static int NOTIFY = 0;
- final static int DISK_ERROR = 1; // there are still valid volumes on DN
+ final static int DISK_ERROR = 1;
final static int INVALID_BLOCK = 2;
- final static int FATAL_DISK_ERROR = 3; // no valid volumes left on DN
/**
* Determines actions that data node should perform
Modified: hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java?rev=1077532&r1=1077531&r2=1077532&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java (original)
+++ hadoop/common/branches/branch-0.20-security-patches/src/test/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java Fri Mar 4 04:25:04 2011
@@ -655,8 +655,4 @@ public class SimulatedFSDataset impleme
public String getStorageInfo() {
return "Simulated FSDataset-" + storageId;
}
-
- public boolean hasEnoughResource() {
- return true;
- }
}