You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2007/04/04 22:20:49 UTC

svn commit: r525596 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/

Author: tomwhite
Date: Wed Apr  4 13:20:48 2007
New Revision: 525596

URL: http://svn.apache.org/viewvc?view=rev&rev=525596
Log:
HADOOP-1133.  Add tool to analyze and debug namenode on a production cluster.  Contributed by Dhruba Borthakur.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Apr  4 13:20:48 2007
@@ -106,6 +106,9 @@
 33. HADOOP-1187.  Improve DFS Scalability: avoid scanning entire list of
     datanodes in getAdditionalBlocks.  (Dhruba Borthakur via tomwhite)
 
+34. HADOOP-1133.  Add tool to analyze and debug namenode on a production
+    cluster.  (Dhruba Borthakur via tomwhite)
+
 
 Release 0.12.3 (not yet released)
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Wed Apr  4 13:20:48 2007
@@ -30,7 +30,7 @@
 interface ClientProtocol extends VersionedProtocol {
 
     /*
-     * 10: finalizeUpgrade() added
+     * 11: metasave() added
      */
     public static final long versionID = 10L;  
   
@@ -355,4 +355,11 @@
      * @throws IOException
      */
     public void finalizeUpgrade() throws IOException;
+
+   /**
+    * Dumps namenode data structures into specified file. If file
+    * already exists, then append.
+    * @throws IOException
+    */
+    public void metaSave(String filename) throws IOException;
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java Wed Apr  4 13:20:48 2007
@@ -217,6 +217,23 @@
     }
 
     /**
+     * Dumps DFS data structures into specified file.
+     * Usage: java DFSAdmin -metasave filename
+     * @param argv List of of command line parameters.
+     * @param idx The index of the command that is being processed.
+     * @exception IOException if an error accoured wile accessing
+     *            the file or path.
+     */
+    public int metaSave(String[] argv, int idx) throws IOException {
+      String pathname = argv[idx];
+      DistributedFileSystem dfs = (DistributedFileSystem) fs;
+      dfs.metaSave(pathname);
+      System.out.println("Created file " + pathname + " on server " +
+                          dfs.getUri());
+      return 0;
+    }
+
+    /**
      * Displays format of commands.
      * @param cmd The command that is being executed.
      */
@@ -233,12 +250,16 @@
           } else if ("-finalizeUpgrade".equals(cmd)) {
             System.err.println("Usage: java DFSAdmin"
                 + " [-finalizeUpgrade]");
+           } else if ("-metasave".equals(cmd)) {
+             System.err.println("Usage: java DFSAdmin"
+                 + " [-metasave filename]");
           } else {
             System.err.println("Usage: java DFSAdmin");
             System.err.println("           [-report]");
             System.err.println("           [-safemode enter | leave | get | wait]");
             System.err.println("           [-refreshNodes]");
             System.err.println("           [-finalizeUpgrade]");
+            System.err.println("           [-metasave filename]");
             System.err.println("           [-help [cmd]]");
           }
     }
@@ -282,6 +303,11 @@
                   printUsage(cmd);
                   return exitCode;
                 }
+        } else if ("-metasave".equals(cmd)) {
+                if (argv.length != 2) {
+                  printUsage(cmd);
+                  return exitCode;
+                }
         }
 
 
@@ -307,6 +333,8 @@
                 exitCode = refreshNodes();
             } else if ("-finalizeUpgrade".equals(cmd)) {
                 exitCode = finalizeUpgrade();
+            } else if ("-metasave".equals(cmd)) {
+                 exitCode = metaSave(argv, i);
             } else if ("-help".equals(cmd)) {
                 if (i < argv.length) {
                     printHelp(argv[i]);

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Wed Apr  4 13:20:48 2007
@@ -375,6 +375,17 @@
     public void refreshNodes() throws IOException {
       namenode.refreshNodes();
     }
+
+    /**
+     * Dumps DFS data structures into specified file.
+     * See {@link ClientProtocol#metaSave()} 
+     * for more details.
+     * 
+     * @see ClientProtocol#metaSave()
+     */
+    public void metaSave(String pathname) throws IOException {
+      namenode.metaSave(pathname);
+    }
     
     /**
      * @see ClientProtocol#finalizeUpgrade()

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java Wed Apr  4 13:20:48 2007
@@ -166,6 +166,30 @@
     return buffer.toString();
   }
 
+  /** A formatted string for printing the status of the DataNode. */
+  String dumpDatanode() {
+    StringBuffer buffer = new StringBuffer();
+    long c = getCapacity();
+    long r = getRemaining();
+    long u = c - r;
+    buffer.append(name);
+    if(!NetworkTopology.DEFAULT_RACK.equals(location)) {
+        buffer.append(" "+location);
+    }
+    if (isDecommissioned()) {
+      buffer.append(" DD");
+    } else if (isDecommissionInProgress()) {
+      buffer.append(" DP");
+    } else {
+      buffer.append(" IN");
+    }
+    buffer.append(" " + c + "(" + FsShell.byteDesc(c)+")");
+    buffer.append(" " + u + "(" + FsShell.byteDesc(u)+")");
+    buffer.append(" " + FsShell.limitDecimal(((1.0*u)/c)*100,2)+"%");
+    buffer.append(" " + new Date(lastUpdate));
+    return buffer.toString();
+  }
+
   /**
    * Start decommissioning a node.
    * old state.

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Wed Apr  4 13:20:48 2007
@@ -311,6 +311,14 @@
       dfs.finalizeUpgrade();
     }
 
+    /*
+     * Requests the namenode to dump data strcutures into specified 
+     * file.
+     */
+    public void metaSave(String pathname) throws IOException {
+      dfs.metaSave(pathname);
+    }
+
     /**
      * We need to find the blocks that didn't match.  Likely only one 
      * is corrupt but we will report both to the namenode.  In the future,
@@ -415,6 +423,13 @@
     public void finalizeUpgrade() throws IOException {
       ((RawDistributedFileSystem)fs).finalizeUpgrade();
     }
+
+    /*
+     * Dumps dfs data structures into specified file.
+     */
+     public void metaSave(String pathname) throws IOException {
+       ((RawDistributedFileSystem)fs).metaSave(pathname);
+     }
 
     /**
      * We need to find the blocks that didn't match.  Likely only one 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Wed Apr  4 13:20:48 2007
@@ -345,6 +345,59 @@
           }
         }
     }
+
+    /**
+     * Dump all metadata into specified file
+     */
+    void metaSave(String filename) throws IOException {
+      File file = new File(System.getProperty("hadoop.log.dir"), 
+                           filename);
+      PrintWriter out = new PrintWriter(new BufferedWriter(
+                                        new FileWriter(file, true)));
+ 
+
+      //
+      // Dump contents of neededReplication
+      //
+      synchronized (neededReplications) {
+        out.println("Metasave: Blocks waiting for replication: " + 
+                    neededReplications.size());
+        if (neededReplications.size() > 0) {
+          for (Iterator<Block> it = neededReplications.iterator(); 
+               it.hasNext();) {
+            Block block = it.next();
+            Collection<DatanodeDescriptor> containingNodes = blocksMap.get(block);
+            out.print(block);
+            if (containingNodes != null) {
+              for (Iterator<DatanodeDescriptor> jt = containingNodes.iterator();
+                   jt.hasNext(); ) {
+                DatanodeDescriptor node = jt.next();
+                out.print(" " + node + " : " );
+              }
+            }
+            out.println("");
+          }
+        }
+      }
+
+      //
+      // Dump blocks from pendingReplication
+      //
+      pendingReplications.metaSave(out);
+
+      //
+      // Dump blocks that are waiting to be deleted
+      //
+      dumpRecentInvalidateSets(out);
+
+      //
+      // Dump all datanodes
+      //
+      datanodeDump(out);
+
+      out.flush();
+      out.close();
+    }
     
     /* get replication factor of a block */
     private int getReplication( Block block ) {
@@ -1053,6 +1106,34 @@
     }
 
     /**
+     * dumps the contents of recentInvalidateSets
+     */
+    private synchronized void dumpRecentInvalidateSets(PrintWriter out) {
+      Collection<Collection<Block>> values = recentInvalidateSets.values();
+      Iterator it = recentInvalidateSets.entrySet().iterator();
+      if (values.size() == 0) {
+        out.println("Metasave: Blocks waiting deletion: 0");
+        return;
+      }
+      out.println("Metasave: Blocks waiting deletion from " +
+                   values.size() + " datanodes.");
+      while (it.hasNext()) {
+        Map.Entry entry = (Map.Entry) it.next();
+        String storageId = (String) entry.getKey();
+        DatanodeDescriptor node = datanodeMap.get(storageId);
+        Collection<Block> blklist = (Collection<Block>) entry.getValue();
+        if (blklist.size() > 0) {
+          out.print(node.getName());
+          for (Iterator jt = blklist.iterator(); jt.hasNext();) {
+            Block block = (Block) jt.next();
+            out.print(" " + block); 
+          }
+          out.println("");
+        }
+      }
+    }
+
+    /**
      * Invalidates the given block on the given datanode.
      */
     public synchronized void invalidateBlock(Block blk, DatanodeInfo dn)
@@ -2380,6 +2461,18 @@
       }
     }
 
+    /**
+     * Prints information about all datanodes.
+     */
+    private synchronized void datanodeDump(PrintWriter out) {
+      synchronized (datanodeMap) {
+        out.println("Metasave: Number of datanodes: " + datanodeMap.size());
+        for(Iterator<DatanodeDescriptor> it = datanodeMap.values().iterator(); it.hasNext(); ) {
+          DatanodeDescriptor node = it.next();
+          out.println(node.dumpDatanode());
+        }
+      }
+    }
 
     /**
      * Start decommissioning the specified datanode. 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Wed Apr  4 13:20:48 2007
@@ -585,6 +585,13 @@
       getFSImage().finalizeUpgrade();
     }
 
+    /**
+     * Dumps namenode state into specified file
+     */
+    public void metaSave(String filename) throws IOException {
+      namesystem.metaSave(filename);
+    }
+
     ////////////////////////////////////////////////////////////////
     // DatanodeProtocol
     ////////////////////////////////////////////////////////////////

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java Wed Apr  4 13:20:48 2007
@@ -19,7 +19,9 @@
 
 import org.apache.commons.logging.*;
 import org.apache.hadoop.util.*;
+import java.io.*;
 import java.util.*;
+import java.sql.Time;
 
 /***************************************************
  * PendingReplicationBlocks does the bookkeeping of all
@@ -226,6 +228,26 @@
     try {
       timerThread.join(3000);
     } catch (InterruptedException ie) {
+    }
+  }
+
+  /**
+   * Iterate through all items and print them.
+   */
+  void metaSave(PrintWriter out) {
+    synchronized (pendingReplications) {
+      out.println("Metasave: Blocks being replicated: " +
+                  pendingReplications.size());
+      Iterator iter = pendingReplications.entrySet().iterator();
+      while (iter.hasNext()) {
+        Map.Entry entry = (Map.Entry) iter.next();
+        PendingBlockInfo pendingBlock = (PendingBlockInfo) entry.getValue();
+        Block block = (Block) entry.getKey();
+        out.println(block + 
+                    " StartTime: " + new Time(pendingBlock.timeStamp) +
+                    " NumReplicaInProgress: " + 
+                    pendingBlock.numReplicasInProgress);
+      }
     }
   }
 }