You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by to...@apache.org on 2007/04/04 22:20:49 UTC
svn commit: r525596 - in /lucene/hadoop/trunk: ./
src/java/org/apache/hadoop/dfs/
Author: tomwhite
Date: Wed Apr 4 13:20:48 2007
New Revision: 525596
URL: http://svn.apache.org/viewvc?view=rev&rev=525596
Log:
HADOOP-1133. Add tool to analyze and debug namenode on a production cluster. Contributed by Dhruba Borthakur.
Modified:
lucene/hadoop/trunk/CHANGES.txt
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java
Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Apr 4 13:20:48 2007
@@ -106,6 +106,9 @@
33. HADOOP-1187. Improve DFS Scalability: avoid scanning entire list of
datanodes in getAdditionalBlocks. (Dhruba Borthakur via tomwhite)
+34. HADOOP-1133. Add tool to analyze and debug namenode on a production
+ cluster. (Dhruba Borthakur via tomwhite)
+
Release 0.12.3 (not yet released)
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Wed Apr 4 13:20:48 2007
@@ -30,7 +30,7 @@
interface ClientProtocol extends VersionedProtocol {
/*
- * 10: finalizeUpgrade() added
+ * 11: metasave() added
*/
public static final long versionID = 10L;
@@ -355,4 +355,11 @@
* @throws IOException
*/
public void finalizeUpgrade() throws IOException;
+
+ /**
+ * Dumps namenode data structures into specified file. If file
+ * already exists, then append.
+ * @throws IOException
+ */
+ public void metaSave(String filename) throws IOException;
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSAdmin.java Wed Apr 4 13:20:48 2007
@@ -217,6 +217,23 @@
}
/**
+ * Dumps DFS data structures into specified file.
+ * Usage: java DFSAdmin -metasave filename
+ * @param argv List of of command line parameters.
+ * @param idx The index of the command that is being processed.
+ * @exception IOException if an error accoured wile accessing
+ * the file or path.
+ */
+ public int metaSave(String[] argv, int idx) throws IOException {
+ String pathname = argv[idx];
+ DistributedFileSystem dfs = (DistributedFileSystem) fs;
+ dfs.metaSave(pathname);
+ System.out.println("Created file " + pathname + " on server " +
+ dfs.getUri());
+ return 0;
+ }
+
+ /**
* Displays format of commands.
* @param cmd The command that is being executed.
*/
@@ -233,12 +250,16 @@
} else if ("-finalizeUpgrade".equals(cmd)) {
System.err.println("Usage: java DFSAdmin"
+ " [-finalizeUpgrade]");
+ } else if ("-metasave".equals(cmd)) {
+ System.err.println("Usage: java DFSAdmin"
+ + " [-metasave filename]");
} else {
System.err.println("Usage: java DFSAdmin");
System.err.println(" [-report]");
System.err.println(" [-safemode enter | leave | get | wait]");
System.err.println(" [-refreshNodes]");
System.err.println(" [-finalizeUpgrade]");
+ System.err.println(" [-metasave filename]");
System.err.println(" [-help [cmd]]");
}
}
@@ -282,6 +303,11 @@
printUsage(cmd);
return exitCode;
}
+ } else if ("-metasave".equals(cmd)) {
+ if (argv.length != 2) {
+ printUsage(cmd);
+ return exitCode;
+ }
}
@@ -307,6 +333,8 @@
exitCode = refreshNodes();
} else if ("-finalizeUpgrade".equals(cmd)) {
exitCode = finalizeUpgrade();
+ } else if ("-metasave".equals(cmd)) {
+ exitCode = metaSave(argv, i);
} else if ("-help".equals(cmd)) {
if (i < argv.length) {
printHelp(argv[i]);
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Wed Apr 4 13:20:48 2007
@@ -375,6 +375,17 @@
public void refreshNodes() throws IOException {
namenode.refreshNodes();
}
+
+ /**
+ * Dumps DFS data structures into specified file.
+ * See {@link ClientProtocol#metaSave()}
+ * for more details.
+ *
+ * @see ClientProtocol#metaSave()
+ */
+ public void metaSave(String pathname) throws IOException {
+ namenode.metaSave(pathname);
+ }
/**
* @see ClientProtocol#finalizeUpgrade()
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java Wed Apr 4 13:20:48 2007
@@ -166,6 +166,30 @@
return buffer.toString();
}
+ /** A formatted string for printing the status of the DataNode. */
+ String dumpDatanode() {
+ StringBuffer buffer = new StringBuffer();
+ long c = getCapacity();
+ long r = getRemaining();
+ long u = c - r;
+ buffer.append(name);
+ if(!NetworkTopology.DEFAULT_RACK.equals(location)) {
+ buffer.append(" "+location);
+ }
+ if (isDecommissioned()) {
+ buffer.append(" DD");
+ } else if (isDecommissionInProgress()) {
+ buffer.append(" DP");
+ } else {
+ buffer.append(" IN");
+ }
+ buffer.append(" " + c + "(" + FsShell.byteDesc(c)+")");
+ buffer.append(" " + u + "(" + FsShell.byteDesc(u)+")");
+ buffer.append(" " + FsShell.limitDecimal(((1.0*u)/c)*100,2)+"%");
+ buffer.append(" " + new Date(lastUpdate));
+ return buffer.toString();
+ }
+
/**
* Start decommissioning a node.
* old state.
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Wed Apr 4 13:20:48 2007
@@ -311,6 +311,14 @@
dfs.finalizeUpgrade();
}
+ /*
+ * Requests the namenode to dump data strcutures into specified
+ * file.
+ */
+ public void metaSave(String pathname) throws IOException {
+ dfs.metaSave(pathname);
+ }
+
/**
* We need to find the blocks that didn't match. Likely only one
* is corrupt but we will report both to the namenode. In the future,
@@ -415,6 +423,13 @@
public void finalizeUpgrade() throws IOException {
((RawDistributedFileSystem)fs).finalizeUpgrade();
}
+
+ /*
+ * Dumps dfs data structures into specified file.
+ */
+ public void metaSave(String pathname) throws IOException {
+ ((RawDistributedFileSystem)fs).metaSave(pathname);
+ }
/**
* We need to find the blocks that didn't match. Likely only one
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Wed Apr 4 13:20:48 2007
@@ -345,6 +345,59 @@
}
}
}
+
+ /**
+ * Dump all metadata into specified file
+ */
+ void metaSave(String filename) throws IOException {
+ File file = new File(System.getProperty("hadoop.log.dir"),
+ filename);
+ PrintWriter out = new PrintWriter(new BufferedWriter(
+ new FileWriter(file, true)));
+
+
+ //
+ // Dump contents of neededReplication
+ //
+ synchronized (neededReplications) {
+ out.println("Metasave: Blocks waiting for replication: " +
+ neededReplications.size());
+ if (neededReplications.size() > 0) {
+ for (Iterator<Block> it = neededReplications.iterator();
+ it.hasNext();) {
+ Block block = it.next();
+ Collection<DatanodeDescriptor> containingNodes = blocksMap.get(block);
+ out.print(block);
+ if (containingNodes != null) {
+ for (Iterator<DatanodeDescriptor> jt = containingNodes.iterator();
+ jt.hasNext(); ) {
+ DatanodeDescriptor node = jt.next();
+ out.print(" " + node + " : " );
+ }
+ }
+ out.println("");
+ }
+ }
+ }
+
+ //
+ // Dump blocks from pendingReplication
+ //
+ pendingReplications.metaSave(out);
+
+ //
+ // Dump blocks that are waiting to be deleted
+ //
+ dumpRecentInvalidateSets(out);
+
+ //
+ // Dump all datanodes
+ //
+ datanodeDump(out);
+
+ out.flush();
+ out.close();
+ }
/* get replication factor of a block */
private int getReplication( Block block ) {
@@ -1053,6 +1106,34 @@
}
/**
+ * dumps the contents of recentInvalidateSets
+ */
+ private synchronized void dumpRecentInvalidateSets(PrintWriter out) {
+ Collection<Collection<Block>> values = recentInvalidateSets.values();
+ Iterator it = recentInvalidateSets.entrySet().iterator();
+ if (values.size() == 0) {
+ out.println("Metasave: Blocks waiting deletion: 0");
+ return;
+ }
+ out.println("Metasave: Blocks waiting deletion from " +
+ values.size() + " datanodes.");
+ while (it.hasNext()) {
+ Map.Entry entry = (Map.Entry) it.next();
+ String storageId = (String) entry.getKey();
+ DatanodeDescriptor node = datanodeMap.get(storageId);
+ Collection<Block> blklist = (Collection<Block>) entry.getValue();
+ if (blklist.size() > 0) {
+ out.print(node.getName());
+ for (Iterator jt = blklist.iterator(); jt.hasNext();) {
+ Block block = (Block) jt.next();
+ out.print(" " + block);
+ }
+ out.println("");
+ }
+ }
+ }
+
+ /**
* Invalidates the given block on the given datanode.
*/
public synchronized void invalidateBlock(Block blk, DatanodeInfo dn)
@@ -2380,6 +2461,18 @@
}
}
+ /**
+ * Prints information about all datanodes.
+ */
+ private synchronized void datanodeDump(PrintWriter out) {
+ synchronized (datanodeMap) {
+ out.println("Metasave: Number of datanodes: " + datanodeMap.size());
+ for(Iterator<DatanodeDescriptor> it = datanodeMap.values().iterator(); it.hasNext(); ) {
+ DatanodeDescriptor node = it.next();
+ out.println(node.dumpDatanode());
+ }
+ }
+ }
/**
* Start decommissioning the specified datanode.
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Wed Apr 4 13:20:48 2007
@@ -585,6 +585,13 @@
getFSImage().finalizeUpgrade();
}
+ /**
+ * Dumps namenode state into specified file
+ */
+ public void metaSave(String filename) throws IOException {
+ namesystem.metaSave(filename);
+ }
+
////////////////////////////////////////////////////////////////
// DatanodeProtocol
////////////////////////////////////////////////////////////////
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java?view=diff&rev=525596&r1=525595&r2=525596
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/PendingReplicationBlocks.java Wed Apr 4 13:20:48 2007
@@ -19,7 +19,9 @@
import org.apache.commons.logging.*;
import org.apache.hadoop.util.*;
+import java.io.*;
import java.util.*;
+import java.sql.Time;
/***************************************************
* PendingReplicationBlocks does the bookkeeping of all
@@ -226,6 +228,26 @@
try {
timerThread.join(3000);
} catch (InterruptedException ie) {
+ }
+ }
+
+ /**
+ * Iterate through all items and print them.
+ */
+ void metaSave(PrintWriter out) {
+ synchronized (pendingReplications) {
+ out.println("Metasave: Blocks being replicated: " +
+ pendingReplications.size());
+ Iterator iter = pendingReplications.entrySet().iterator();
+ while (iter.hasNext()) {
+ Map.Entry entry = (Map.Entry) iter.next();
+ PendingBlockInfo pendingBlock = (PendingBlockInfo) entry.getValue();
+ Block block = (Block) entry.getKey();
+ out.println(block +
+ " StartTime: " + new Time(pendingBlock.timeStamp) +
+ " NumReplicaInProgress: " +
+ pendingBlock.numReplicasInProgress);
+ }
}
}
}