You are viewing a plain text version of this content. The canonical link for it is here.
Posted to hdfs-commits@hadoop.apache.org by el...@apache.org on 2010/11/12 19:29:34 UTC

svn commit: r1034501 - in /hadoop/hdfs/trunk: ./ src/java/org/apache/hadoop/hdfs/server/datanode/

Author: eli
Date: Fri Nov 12 18:29:33 2010
New Revision: 1034501

URL: http://svn.apache.org/viewvc?rev=1034501&view=rev
Log:
HDFS-1160. Improve some FSDataset warnings and comments. Contributed by Eli Collins.

Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=1034501&r1=1034500&r2=1034501&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Fri Nov 12 18:29:33 2010
@@ -170,6 +170,8 @@ Trunk (unreleased changes)
 
     HDFS-1408. Herriot NN and DN clients should vend statistics. (cos)
 
+    HDFS-1160. Improve some FSDataset warnings and comments. (eli)
+
   OPTIMIZATIONS
 
     HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv)

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java?rev=1034501&r1=1034500&r2=1034501&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java Fri Nov 12 18:29:33 2010
@@ -951,7 +951,7 @@ class BlockReceiver implements java.io.C
             try {
               datanode.checkDiskError(e); // may throw an exception here
             } catch (IOException ioe) {
-              LOG.warn("DataNode.chekDiskError failed in run() with: ", ioe);
+              LOG.warn("DataNode.checkDiskError failed in run() with: ", ioe);
             }
             LOG.info("PacketResponder " + block + " " + numTargets + 
                      " Exception " + StringUtils.stringifyException(e));

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java?rev=1034501&r1=1034500&r2=1034501&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java Fri Nov 12 18:29:33 2010
@@ -848,30 +848,28 @@ public class DataNode extends Configured
   }
   
   private void handleDiskError(String errMsgr) {
-    boolean hasEnoughResource = data.hasEnoughResource();
-    LOG.warn("DataNode.handleDiskError: Keep Running: " + hasEnoughResource);
+    final boolean hasEnoughResources = data.hasEnoughResource();
+    LOG.warn("DataNode.handleDiskError: Keep Running: " + hasEnoughResources);
+
+    // If we have enough active valid volumes then we do not want to 
+    // shutdown the DN completely.
+    int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR  
+                                     : DatanodeProtocol.FATAL_DISK_ERROR;  
     
-    //if hasEnoughtResource = true - more volumes are available, so we don't want 
-    // to shutdown DN completely and don't want NN to remove it.
-    int dp_error = DatanodeProtocol.DISK_ERROR;
-    if(hasEnoughResource == false) {
-      // DN will be shutdown and NN should remove it
-      dp_error = DatanodeProtocol.FATAL_DISK_ERROR;
-    }
-    //inform NameNode
     try {
-      namenode.errorReport(
-                           dnRegistration, dp_error, errMsgr);
-    } catch(IOException ignored) {              
+      namenode.errorReport(dnRegistration, dpError, errMsgr);
+    } catch (IOException e) {
+      LOG.warn("Error reporting disk failure to NameNode: " + 
+          StringUtils.stringifyException(e));
     }
     
     
-    if(hasEnoughResource) {
+    if (hasEnoughResources) {
       scheduleBlockReport(0);
       return; // do not shutdown
     }
     
-    LOG.warn("DataNode is shutting down.\n" + errMsgr);
+    LOG.warn("DataNode is shutting down: " + errMsgr);
     shouldRun = false; 
   }
     
@@ -916,7 +914,6 @@ public class DataNode extends Configured
                                                        xmitsInProgress.get(),
                                                        getXceiverCount());
           myMetrics.heartbeats.inc(now() - startTime);
-          //LOG.info("Just sent heartbeat, with name " + localName);
           if (!processCommand(cmds))
             continue;
         }

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java?rev=1034501&r1=1034500&r2=1034501&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java Fri Nov 12 18:29:33 2010
@@ -607,14 +607,12 @@ public class FSDataset implements FSCons
     }
       
     /**
-     * goes over all the volumes and checkDir eachone of them
-     * if one throws DiskErrorException - removes from the list of active 
-     * volumes. 
-     * @return list of all the removed volumes
+     * Calls {@link FSVolume#checkDirs()} on each volume, removing any
+     * volumes from the active list that result in a DiskErrorException.
+     * @return list of all the removed volumes.
      */
     synchronized List<FSVolume> checkDirs() {
-      
-      ArrayList<FSVolume> removed_vols = null;  
+      ArrayList<FSVolume> removedVols = null;  
       
       for (int idx = 0; idx < volumes.length; idx++) {
         FSVolume fsv = volumes[idx];
@@ -622,31 +620,30 @@ public class FSDataset implements FSCons
           fsv.checkDirs();
         } catch (DiskErrorException e) {
           DataNode.LOG.warn("Removing failed volume " + fsv + ": ",e);
-          if(removed_vols == null) {
-            removed_vols = new ArrayList<FSVolume>(1);
+          if (removedVols == null) {
+            removedVols = new ArrayList<FSVolume>(1);
           }
-          removed_vols.add(volumes[idx]);
-          volumes[idx] = null; //remove the volume
+          removedVols.add(volumes[idx]);
+          volumes[idx] = null; // Remove the volume
         }
       }
       
-      // repair array - copy non null elements
-      int removed_size = (removed_vols==null)? 0 : removed_vols.size();
-      if(removed_size > 0) {
-        FSVolume fsvs[] = new FSVolume [volumes.length-removed_size];
-        for(int idx=0,idy=0; idx<volumes.length; idx++) {
-          if(volumes[idx] != null) {
-            fsvs[idy] = volumes[idx];
-            idy++;
+      // Remove null volumes from the volumes array
+      if (removedVols != null && removedVols.size() > 0) {
+        FSVolume newVols[] = new FSVolume[volumes.length - removedVols.size()];
+        int i = 0;
+        for (FSVolume vol : volumes) {
+          if (vol != null) {
+            newVols[i++] = vol;
           }
         }
-        volumes = fsvs; // replace array of volumes
+        volumes = newVols; // Replace array of volumes
         DataNode.LOG.info("Completed FSVolumeSet.checkDirs. Removed "
-            + removed_vols.size() + " volumes. List of current volumes: "
+            + removedVols.size() + " volumes. List of current volumes: "
             + this);
       }
 
-      return removed_vols;
+      return removedVols;
     }
       
     public String toString() {
@@ -1697,46 +1694,46 @@ public class FSDataset implements FSCons
    */
   @Override // FSDatasetInterface
   public void checkDataDir() throws DiskErrorException {
-    long total_blocks=0, removed_blocks=0;
-    List<FSVolume> failed_vols =  volumes.checkDirs();
+    long totalBlocks=0, removedBlocks=0;
+    List<FSVolume> failedVols =  volumes.checkDirs();
     
-    //if there no failed volumes return
-    if(failed_vols == null) 
+    // If there no failed volumes return
+    if (failedVols == null) { 
       return;
+    }
     
-    // else 
-    // remove related blocks
+    // Otherwise remove blocks for the failed volumes
     long mlsec = System.currentTimeMillis();
     synchronized (this) {
       Iterator<ReplicaInfo> ib = volumeMap.replicas().iterator();
-      while(ib.hasNext()) {
+      while (ib.hasNext()) {
         ReplicaInfo b = ib.next();
-        total_blocks ++;
+        totalBlocks++;
         // check if the volume block belongs to still valid
         FSVolume vol = b.getVolume();
-        for(FSVolume fv: failed_vols) {
-          if(vol == fv) {
-            DataNode.LOG.warn("removing block " + b.getBlockId() + " from vol " 
-                + vol.dataDir.dir.getAbsolutePath());
+        for (FSVolume fv: failedVols) {
+          if (vol == fv) {
+            DataNode.LOG.warn("Removing replica info for block " + 
+              b.getBlockId() + " on failed volume " + 
+              vol.dataDir.dir.getAbsolutePath());
             ib.remove();
-            removed_blocks++;
+            removedBlocks++;
             break;
           }
         }
       }
     } // end of sync
     mlsec = System.currentTimeMillis() - mlsec;
-    DataNode.LOG.warn("Removed " + removed_blocks + " out of " + total_blocks +
+    DataNode.LOG.warn("Removed " + removedBlocks + " out of " + totalBlocks +
         "(took " + mlsec + " millisecs)");
 
     // report the error
     StringBuilder sb = new StringBuilder();
-    for(FSVolume fv : failed_vols) {
+    for (FSVolume fv : failedVols) {
       sb.append(fv.dataDir.dir.getAbsolutePath() + ";");
     }
 
     throw  new DiskErrorException("DataNode failed volumes:" + sb);
-  
   }
     
 

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java?rev=1034501&r1=1034500&r2=1034501&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java Fri Nov 12 18:29:33 2010
@@ -316,8 +316,9 @@ public interface FSDatasetInterface exte
       int checksumSize) throws IOException;
 
   /**
-   * checks how many valid storage volumes are there in the DataNode
-   * @return true if more then minimum valid volumes left in the FSDataSet
+   * Checks how many valid storage volumes there are in the DataNode.
+   * @return true if more than the minimum number of valid volumes are left 
+   * in the FSDataSet.
    */
   public boolean hasEnoughResource();