You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2013/03/31 06:27:31 UTC
svn commit: r1462875 [1/2] - in /hadoop/common/branches/branch-1: ./ bin/ src/core/org/apache/hadoop/io/compress/ src/docs/src/documentation/content/xdocs/ src/hdfs/org/apache/hadoop/hdfs/ src/hdfs/org/apache/hadoop/hdfs/protocol/ src/hdfs/org/apache/h...

Author: suresh
Date: Sun Mar 31 04:27:29 2013
New Revision: 1462875

URL: http://svn.apache.org/r1462875
Log:
HDFS-4651. Offline Image Viewer backport to branch-1. Contributed by Chris Nauroth.

Added:
    hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/SpotCheckImageVisitor.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestDelimitedImageVisitor.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOIVCanReadOldVersions.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV18   (with props)
    hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV19   (with props)
Modified:
    hadoop/common/branches/branch-1/bin/hadoop
    hadoop/common/branches/branch-1/build.xml
    hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java
    hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
    hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java

Modified: hadoop/common/branches/branch-1/bin/hadoop
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/bin/hadoop?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/bin/hadoop (original)
+++ hadoop/common/branches/branch-1/bin/hadoop Sun Mar 31 04:27:29 2013
@@ -81,6 +81,7 @@ print_usage()
   echo "  fsck                 run a DFS filesystem checking utility"
   echo "  fs                   run a generic filesystem user client"
   echo "  balancer             run a cluster balancing utility"
+  echo "  oiv                  apply the offline fsimage viewer to an fsimage"
   echo "  fetchdt              fetch a delegation token from the NameNode"
   echo "  jobtracker           run the MapReduce job Tracker node" 
   echo "  pipes                run a Pipes job"
@@ -287,6 +288,8 @@ elif [ "$COMMAND" = "fsck" ] ; then
 elif [ "$COMMAND" = "balancer" ] ; then
   CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
+elif [ "$COMMAND" = "oiv" ] ; then
+  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
 elif [ "$COMMAND" = "fetchdt" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
 elif [ "$COMMAND" = "jobtracker" ] ; then

Modified: hadoop/common/branches/branch-1/build.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/build.xml?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/build.xml (original)
+++ hadoop/common/branches/branch-1/build.xml Sun Mar 31 04:27:29 2013
@@ -936,6 +936,8 @@
     <copy file="${test.src.dir}/org/apache/hadoop/cli/clitest_data/data30bytes" todir="${test.cache.data}"/>
     <copy file="${test.src.dir}/org/apache/hadoop/cli/clitest_data/data60bytes" todir="${test.cache.data}"/>
     <copy file="${test.src.dir}/org/apache/hadoop/cli/clitest_data/data120bytes" todir="${test.cache.data}"/>
+    <copy file="${test.src.dir}/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV18" todir="${test.cache.data}"/>
+    <copy file="${test.src.dir}/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV19" todir="${test.cache.data}"/>
   </target>
 
   <!-- ================================================================== -->

Modified: hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java (original)
+++ hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java Sun Mar 31 04:27:29 2013
@@ -39,10 +39,30 @@ public class CompressionCodecFactory {
    * automatically supports finding the longest matching suffix. 
    */
   private SortedMap<String, CompressionCodec> codecs = null;
+
+    /**
+     * A map from the reversed filename suffixes to the codecs.
+     * This is probably overkill, because the maps should be small, but it
+     * automatically supports finding the longest matching suffix.
+     */
+    private Map<String, CompressionCodec> codecsByName = null;
+
+  /**
+   * A map from class names to the codecs
+   */
+  private HashMap<String, CompressionCodec> codecsByClassName = null;
   
   private void addCodec(CompressionCodec codec) {
     String suffix = codec.getDefaultExtension();
     codecs.put(new StringBuffer(suffix).reverse().toString(), codec);
+    codecsByClassName.put(codec.getClass().getCanonicalName(), codec);
+
+    String codecName = codec.getClass().getSimpleName();
+    codecsByName.put(codecName.toLowerCase(), codec);
+    if (codecName.endsWith("Codec")) {
+      codecName = codecName.substring(0, codecName.length() - "Codec".length());
+      codecsByName.put(codecName.toLowerCase(), codec);
+    }
   }
   
   /**
@@ -131,6 +151,8 @@ public class CompressionCodecFactory {
    */
   public CompressionCodecFactory(Configuration conf) {
     codecs = new TreeMap<String, CompressionCodec>();
+    codecsByClassName = new HashMap<String, CompressionCodec>();
+    codecsByName = new HashMap<String, CompressionCodec>();
     List<Class<? extends CompressionCodec>> codecClasses = getCodecClasses(conf);
     if (codecClasses == null) {
       addCodec(new GzipCodec());
@@ -168,6 +190,68 @@ public class CompressionCodecFactory {
   }
   
   /**
+   * Find the relevant compression codec for the codec's canonical class name.
+   * @param classname the canonical class name of the codec
+   * @return the codec object
+   */
+  public CompressionCodec getCodecByClassName(String classname) {
+    if (codecsByClassName == null) {
+      return null;
+    }
+    return codecsByClassName.get(classname);
+  }
+
+    /**
+     * Find the relevant compression codec for the codec's canonical class name
+     * or by codec alias.
+     * <p/>
+     * Codec aliases are case insensitive.
+     * <p/>
+     * The code alias is the short class name (without the package name).
+     * If the short class name ends with 'Codec', then there are two aliases for
+     * the codec, the complete short class name and the short class name without
+     * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+     * alias are 'gzip' and 'gzipcodec'.
+     *
+     * @param codecName the canonical class name of the codec
+     * @return the codec object
+     */
+    public CompressionCodec getCodecByName(String codecName) {
+      if (codecsByClassName == null) {
+        return null;
+      }
+      CompressionCodec codec = getCodecByClassName(codecName);
+      if (codec == null) {
+        // trying to get the codec by name in case the name was specified instead a class
+        codec = codecsByName.get(codecName.toLowerCase());
+      }
+      return codec;
+    }
+
+    /**
+     * Find the relevant compression codec for the codec's canonical class name
+     * or by codec alias and returns its implemetation class.
+     * <p/>
+     * Codec aliases are case insensitive.
+     * <p/>
+     * The code alias is the short class name (without the package name).
+     * If the short class name ends with 'Codec', then there are two aliases for
+     * the codec, the complete short class name and the short class name without
+     * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+     * alias are 'gzip' and 'gzipcodec'.
+     *
+     * @param codecName the canonical class name of the codec
+     * @return the codec class
+     */
+    public Class<? extends CompressionCodec> getCodecClassByName(String codecName) {
+      CompressionCodec codec = getCodecByName(codecName);
+      if (codec == null) {
+        return null;
+      }
+      return codec.getClass();
+    }
+  
+  /**
    * Removes a suffix from a filename, if it has it.
    * @param filename the filename to strip
    * @param suffix the suffix to remove

Added: hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml (added)
+++ hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml Sun Mar 31 04:27:29 2013
@@ -0,0 +1,427 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+
+  <header>
+    <title>Offline Image Viewer Guide</title>
+  </header>
+
+  <body>
+
+    <section>
+      <title>Overview</title>
+
+      <p>The Offline Image Viewer is a tool to dump the contents of hdfs
+      fsimage files to human-readable formats in order to allow offline analysis
+      and examination of an Hadoop cluster's namespace. The tool is able to
+      process very large image files relatively quickly, converting them to
+      one of several output formats. The tool handles the layout formats that
+      were included with Hadoop versions 16 and up. If the tool is not able to
+      process an image file, it will exit cleanly. The Offline Image Viewer does not require
+      an Hadoop cluster to be running; it is entirely offline in its operation.</p>
+
+      <p>The Offline Image Viewer provides several output processors:</p>
+        <ol>
+        <li><strong>Ls</strong> is the default output processor. It closely mimics the format of
+          the <code>lsr </code> command. It includes the same fields, in the same order, as
+          <code>lsr </code>: directory or file flag, permissions, replication, owner, group,
+          file size, modification date, and full path. Unlike the <code>lsr </code> command,
+          the root path is included. One important difference between the output
+          of the <code>lsr </code> command this processor, is that this output is not sorted
+          by directory name and contents. Rather, the files are listed in the
+          order in which they are stored in the fsimage file. Therefore, it is
+          not possible to directly compare the output of the <code>lsr </code> command this
+          this tool. The Ls processor uses information contained within the Inode blocks to
+          calculate file sizes and ignores the <code>-skipBlocks</code> option.</li>
+        <li><strong>Indented</strong> provides a more complete view of the fsimage's contents,
+          including all of the information included in the image, such as image
+          version, generation stamp and inode- and block-specific listings. This
+          processor uses indentation to organize the output into a hierarchal manner.
+          The <code>lsr </code> format is suitable for easy human comprehension.</li>
+        <li><strong>Delimited</strong> provides one file per line consisting of the path,
+        replication, modification time, access time, block size, number of blocks, file size,
+        namespace quota, diskspace quota, permissions, username and group name. If run against
+        an fsimage that does not contain any of these fields, the field's column will be included,
+        but no data recorded. The default record delimiter is a tab, but this may be changed
+        via the <code>-delimiter</code> command line argument. This processor is designed to
+        create output that is easily analyzed by other tools, such as <a href="http://hadoop.apache.org/pig/">Apache Pig</a>. 
+        See the <a href="#analysis">Analyzing Results</a> section
+        for further information on using this processor to analyze the contents of fsimage files.</li>
+        <li><strong>XML</strong> creates an XML document of the fsimage and includes all of the
+          information within the fsimage, similar to the <code>lsr </code> processor. The output
+          of this processor is amenable to automated processing and analysis with XML tools.
+          Due to the verbosity of the XML syntax, this processor will also generate
+          the largest amount of output.</li>
+        <li><strong>FileDistribution</strong> is the tool for analyzing file 
+          sizes in the namespace image. In order to run the tool one should 
+          define a range of integers <code>[0, maxSize]</code> by specifying
+          <code>maxSize</code> and a <code>step</code>.
+          The range of integers is divided into segments of size
+          <code>step</code>:
+          <code>[0, s</code><sub>1</sub><code>, ..., s</code><sub>n-1</sub><code>, maxSize]</code>, 
+          and the processor calculates how many files in the system fall into 
+          each segment <code>[s</code><sub>i-1</sub><code>, s</code><sub>i</sub><code>)</code>.
+          Note that files larger than <code>maxSize</code> always fall into 
+          the very last segment.
+          The output file is formatted as a tab separated two column table:
+          Size and NumFiles. Where Size represents the start of the segment,
+          and numFiles is the number of files form the image which size falls
+          in this segment.</li>
+        </ol>
+
+    </section> <!-- overview -->
+
+    <section>
+      <title>Usage</title>
+
+      <section>
+        <title>Basic</title>
+        <p>The simplest usage of the Offline Image Viewer is to provide just an input and output
+          file, via the <code>-i</code> and <code>-o</code> command-line switches:</p>
+
+        <p><code>bash$ bin/hadoop oiv -i fsimage -o fsimage.txt</code><br/></p>
+
+        <p>This will create a file named fsimage.txt in the current directory using
+        the Ls output processor.  For very large image files, this process may take
+        several minutes.</p>
+
+        <p>One can specify which output processor via the command-line switch <code>-p</code>.
+        For instance:</p>
+        <p><code>bash$ bin/hadoop oiv -i fsimage -o fsimage.xml -p XML</code><br/></p>
+
+        <p>or</p>
+
+        <p><code>bash$ bin/hadoop oiv -i fsimage -o fsimage.txt -p Indented</code><br/></p>
+
+        <p>This will run the tool using either the XML or Indented output processor,
+        respectively.</p>
+
+        <p>One command-line option worth considering is <code>-skipBlocks</code>, which
+        prevents the tool from explicitly enumerating all of the blocks that make up
+        a file in the namespace. This is useful for file systems that have very large
+        files. Enabling this option can significantly decrease the size of the resulting
+        output, as individual blocks are not included. Note, however, that the Ls processor
+        needs to enumerate the blocks and so overrides this option.</p>
+
+      </section> <!-- Basic -->
+      <section id="Example">
+        <title>Example</title>
+
+<p>Consider the following contrived namespace:</p>
+<source>
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:17 /anotherDir 
+
+-rw-r--r--   3 theuser supergroup  286631664 2009-03-16 21:15 /anotherDir/biggerfile 
+
+-rw-r--r--   3 theuser supergroup       8754 2009-03-16 21:17 /anotherDir/smallFile 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:11 /mapredsystem 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:11 /mapredsystem/theuser 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem 
+
+drwx-wx-wx   - theuser supergroup          0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:12 /one 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:12 /one/two 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:16 /user 
+
+drwxr-xr-x   - theuser supergroup          0 2009-03-16 21:19 /user/theuser 
+</source>          
+
+<p>Applying the Offline Image Processor against this file with default options would result in the following output:</p>
+<source>
+machine:hadoop-0.21.0-dev theuser$ bin/hadoop oiv -i fsimagedemo -o fsimage.txt 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:16 / 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:17 /anotherDir 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:11 /mapredsystem 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:12 /one 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:16 /user 
+
+-rw-r--r--  3   theuser supergroup    286631664 2009-03-16 14:15 /anotherDir/biggerfile 
+
+-rw-r--r--  3   theuser supergroup         8754 2009-03-16 14:17 /anotherDir/smallFile 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:11 /mapredsystem/theuser 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem 
+
+drwx-wx-wx  -   theuser supergroup            0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:12 /one/two 
+
+drwxr-xr-x  -   theuser supergroup            0 2009-03-16 14:19 /user/theuser 
+</source>
+
+<p>Similarly, applying the Indented processor would generate output that begins with:</p>
+<source>
+machine:hadoop-0.21.0-dev theuser$ bin/hadoop oiv -i fsimagedemo -p Indented -o fsimage.txt 
+
+FSImage 
+
+  ImageVersion = -19 
+
+  NamespaceID = 2109123098 
+
+  GenerationStamp = 1003 
+
+  INodes [NumInodes = 12] 
+
+    Inode 
+
+      INodePath =  
+
+      Replication = 0 
+
+      ModificationTime = 2009-03-16 14:16 
+
+      AccessTime = 1969-12-31 16:00 
+
+      BlockSize = 0 
+
+      Blocks [NumBlocks = -1] 
+
+      NSQuota = 2147483647 
+
+      DSQuota = -1 
+
+      Permissions 
+
+        Username = theuser 
+
+        GroupName = supergroup 
+
+        PermString = rwxr-xr-x 
+
+   remaining output omitted
+</source>          
+          
+      </section> <!-- example-->
+
+    </section>
+
+    <section id="options">
+        <title>Options</title>
+
+        <section>
+        <title>Option Index</title>
+        <table>
+          <tr><th> Flag </th><th> Description </th></tr>
+          <tr><td><code>[-i|--inputFile] &lt;input file&gt;</code></td>
+              <td>Specify the input fsimage file to process. Required.</td></tr>
+          <tr><td><code>[-o|--outputFile] &lt;output file&gt;</code></td>
+              <td>Specify the output filename, if the specified output processor
+              generates one. If the specified file already exists, it is silently overwritten. Required.
+              </td></tr>
+          <tr><td><code>[-p|--processor] &lt;processor&gt;</code></td>
+                  <td>Specify the image processor to apply against the image file. Currently
+                    valid options are Ls (default), XML and Indented..
+                  </td></tr>
+          <tr><td><code>-skipBlocks</code></td>
+              <td>Do not enumerate individual blocks within files. This may save processing time
+              and outfile file space on namespaces with very large files. The <code>Ls</code> processor reads
+              the blocks to correctly determine file sizes and ignores this option.</td></tr>
+          <tr><td><code>-printToScreen</code></td>
+              <td>Pipe output of processor to console as well as specified file. On extremely 
+              large namespaces, this may increase processing time by an order of magnitude.</td></tr>
+           <tr><td><code>-delimiter &lt;arg&gt;</code></td>
+                  <td>When used in conjunction with the Delimited processor, replaces the default
+	                    tab delimiter with the string specified by <code>arg</code>.</td></tr>
+          <tr><td><code>[-h|--help]</code></td>
+              <td>Display the tool usage and help information and exit.</td></tr>
+            </table>
+          </section> <!-- options -->
+    </section>
+   
+    <section id="analysis">
+      <title>Analyzing Results</title>
+      <p>The Offline Image Viewer makes it easy to gather large amounts of data about the hdfs namespace.
+         This information can then be used to explore file system usage patterns or find
+        specific files that match arbitrary criteria, along with other types of namespace analysis. The Delimited 
+         image processor in particular creates
+        output that is amenable to further processing by tools such as <a href="http://hadoop.apache.org/pig/">Apache Pig</a>. Pig provides a particularly
+        good choice for analyzing these data as it is able to deal with the output generated from a small fsimage
+        but also scales up to consume data from extremely large file systems.</p>
+      <p>The Delimited image processor generates lines of text separated, by default, by tabs and includes
+        all of the fields that are common between constructed files and files that were still under constructed
+        when the fsimage was generated. Examples scripts are provided demonstrating how to use this output to 
+        accomplish three tasks: determine the number of files each user has created on the file system,
+        find files were created but have not accessed, and find probable duplicates of large files by comparing
+        the size of each file.</p>
+      <p>Each of the following scripts assumes you have generated an output file using the Delimited processor named
+        <code>foo</code> and will be storing the results of the Pig analysis in a file named <code>results</code>.</p>
+      <section>
+      <title>Total Number of Files for Each User</title>
+      <p>This script processes each path within the namespace, groups them by the file owner and determines the total
+      number of files each user owns.</p>
+      <p><strong>numFilesOfEachUser.pig:</strong></p>
+        <source>
+-- This script determines the total number of files each user has in
+-- the namespace. Its output is of the form:
+--   username, totalNumFiles
+
+-- Load all of the fields from the file
+A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
+                                                 replication:int,
+                                                 modTime:chararray,
+                                                 accessTime:chararray,
+                                                 blockSize:long,
+                                                 numBlocks:int,
+                                                 fileSize:long,
+                                                 NamespaceQuota:int,
+                                                 DiskspaceQuota:int,
+                                                 perms:chararray,
+                                                 username:chararray,
+                                                 groupname:chararray);
+
+
+-- Grab just the path and username
+B = FOREACH A GENERATE path, username;
+
+-- Generate the sum of the number of paths for each user
+C = FOREACH (GROUP B BY username) GENERATE group, COUNT(B.path);
+
+-- Save results
+STORE C INTO '$outputFile';
+        </source>
+      <p>This script can be run against pig with the following command:</p>
+      <p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../numFilesOfEachUser.pig</code><br/></p>
+      <p>The output file's content will be similar to that below:</p>
+      <p>
+        <code>bart  1</code><br/>
+        <code>lisa  16</code><br/>
+        <code>homer 28</code><br/>
+        <code>marge 2456</code><br/>
+      </p>
+      </section>
+      
+      <section><title>Files That Have Never Been Accessed</title>
+      <p>This script finds files that were created but whose access times were never changed, meaning they were never opened or viewed.</p>
+            <p><strong>neverAccessed.pig:</strong></p>
+      <source>
+-- This script generates a list of files that were created but never
+-- accessed, based on their AccessTime
+
+-- Load all of the fields from the file
+A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
+                                                 replication:int,
+                                                 modTime:chararray,
+                                                 accessTime:chararray,
+                                                 blockSize:long,
+                                                 numBlocks:int,
+                                                 fileSize:long,
+                                                 NamespaceQuota:int,
+                                                 DiskspaceQuota:int,
+                                                 perms:chararray,
+                                                 username:chararray,
+                                                 groupname:chararray);
+
+-- Grab just the path and last time the file was accessed
+B = FOREACH A GENERATE path, accessTime;
+
+-- Drop all the paths that don't have the default assigned last-access time
+C = FILTER B BY accessTime == '1969-12-31 16:00';
+
+-- Drop the accessTimes, since they're all the same
+D = FOREACH C GENERATE path;
+
+-- Save results
+STORE D INTO '$outputFile';
+      </source>
+      <p>This script can be run against pig with the following command and its output file's content will be a list of files that were created but never viewed afterwards.</p>
+      <p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../neverAccessed.pig</code><br/></p>
+      </section>
+      <section><title>Probable Duplicated Files Based on File Size</title>
+      <p>This script groups files together based on their size, drops any that are of less than 100mb and returns a list of the file size, number of files found and a tuple of the file paths.  This can be used to find likely duplicates within the filesystem namespace.</p>
+      
+            <p><strong>probableDuplicates.pig:</strong></p>
+      <source>
+-- This script finds probable duplicate files greater than 100 MB by
+-- grouping together files based on their byte size. Files of this size
+-- with exactly the same number of bytes can be considered probable
+-- duplicates, but should be checked further, either by comparing the
+-- contents directly or by another proxy, such as a hash of the contents.
+-- The scripts output is of the type:
+--    fileSize numProbableDuplicates {(probableDup1), (probableDup2)}
+
+-- Load all of the fields from the file
+A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
+                                                 replication:int,
+                                                 modTime:chararray,
+                                                 accessTime:chararray,
+                                                 blockSize:long,
+                                                 numBlocks:int,
+                                                 fileSize:long,
+                                                 NamespaceQuota:int,
+                                                 DiskspaceQuota:int,
+                                                 perms:chararray,
+                                                 username:chararray,
+                                                 groupname:chararray);
+
+-- Grab the pathname and filesize
+B = FOREACH A generate path, fileSize;
+
+-- Drop files smaller than 100 MB
+C = FILTER B by fileSize > 100L  * 1024L * 1024L;
+
+-- Gather all the files of the same byte size
+D = GROUP C by fileSize;
+
+-- Generate path, num of duplicates, list of duplicates
+E = FOREACH D generate group AS fileSize, COUNT(C) as numDupes, C.path AS files;
+
+-- Drop all the files where there are only one of them
+F = FILTER E by numDupes > 1L;
+
+-- Sort by the size of the files
+G = ORDER F by fileSize;
+
+-- Save results
+STORE G INTO '$outputFile';
+      </source>
+      <p>This script can be run against pig with the following command:</p>
+      <p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../probableDuplicates.pig</code><br/></p>
+      <p> The output file's content will be similar to that below:</p>
+      
+<source>
+1077288632 2 {(/user/tennant/work1/part-00501),(/user/tennant/work1/part-00993)} 
+1077288664 4 {(/user/tennant/work0/part-00567),(/user/tennant/work0/part-03980),(/user/tennant/work1/part-00725),(/user/eccelston/output/part-03395)} 
+1077288668 3 {(/user/tennant/work0/part-03705),(/user/tennant/work0/part-04242),(/user/tennant/work1/part-03839)} 
+1077288698 2 {(/user/tennant/work0/part-00435),(/user/eccelston/output/part-01382)} 
+1077288702 2 {(/user/tennant/work0/part-03864),(/user/eccelston/output/part-03234)} 
+</source>      
+      <p>Each line includes the file size in bytes that was found to be duplicated, the number of duplicates found, and a list of the duplicated paths. 
+      Files less than 100MB are ignored, providing a reasonable likelihood that files of these exact sizes may be duplicates.</p>
+      </section>
+    </section>
+
+
+  </body>
+
+</document>

Modified: hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml Sun Mar 31 04:27:29 2013
@@ -63,6 +63,7 @@ See http://forrest.apache.org/docs/linki
     <hdfs_perm        label="Permissions" href="hdfs_permissions_guide.html" />
     <hdfs_quotas      label="Quotas" href="hdfs_quota_admin_guide.html" />
     <hdfs_SLG         label="Synthetic Load Generator"  href="SLG_user_guide.html" />
+    <hdfs_imageviewer	label="Offline Image Viewer"	href="hdfs_imageviewer.html" />
     <hftp label="HFTP" href="hftp.html"/>
     <webhdfs label="WebHDFS REST API" href="webhdfs.html" />
     <hdfs_libhdfs       label="C API libhdfs" href="libhdfs.html" />

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java Sun Mar 31 04:27:29 2013
@@ -130,6 +130,8 @@ public class DFSConfigKeys extends Commo
   public static final long    DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = 24*60*60*1000;
   public static final String  DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY = "dfs.namenode.delegation.token.max-lifetime";
   public static final long    DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = 7*24*60*60*1000;
+  public static final String  DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY = "dfs.namenode.delegation.token.always-use"; // for tests
+  public static final boolean DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT = false;
 
   //Following keys have no defaults
   public static final String  DFS_DATANODE_DATA_DIR_KEY = "dfs.datanode.data.dir";

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocol;
+
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * This class tracks changes in the layout version of HDFS.
+ * 
+ * Layout version is changed for following reasons:
+ * <ol>
+ * <li>The layout of how namenode or datanode stores information 
+ * on disk changes.</li>
+ * <li>A new operation code is added to the editlog.</li>
+ * <li>Modification such as format of a record, content of a record 
+ * in editlog or fsimage.</li>
+ * </ol>
+ * <br>
+ * <b>How to update layout version:<br></b>
+ * When a change requires new layout version, please add an entry into
+ * {@link Feature} with a short enum name, new layout version and description
+ * of the change. Please see {@link Feature} for further details.
+ * <br>
+ */
+@InterfaceAudience.Private
+public class LayoutVersion {
+ 
+  /**
+   * Version in which HDFS-2991 was fixed. This bug caused OP_ADD to
+   * sometimes be skipped for append() calls. If we see such a case when
+   * loading the edits, but the version is known to have that bug, we
+   * workaround the issue. Otherwise we should consider it a corruption
+   * and bail.
+   */
+  public static final int BUGFIX_HDFS_2991_VERSION = -40;
+
+  /**
+   * Enums for features that change the layout version.
+   * <br><br>
+   * To add a new layout version:
+   * <ul>
+   * <li>Define a new enum constant with a short enum name, the new layout version 
+   * and description of the added feature.</li>
+   * <li>When adding a layout version with an ancestor that is not same as
+   * its immediate predecessor, use the constructor where a spacific ancestor
+   * can be passed.
+   * </li>
+   * </ul>
+   */
+  public static enum Feature {
+    NAMESPACE_QUOTA(-16, "Support for namespace quotas"),
+    FILE_ACCESS_TIME(-17, "Support for access time on files"),
+    DISKSPACE_QUOTA(-18, "Support for disk space quotas"),
+    STICKY_BIT(-19, "Support for sticky bits"),
+    APPEND_RBW_DIR(-20, "Datanode has \"rbw\" subdirectory for append"),
+    ATOMIC_RENAME(-21, "Support for atomic rename"),
+    CONCAT(-22, "Support for concat operation"),
+    SYMLINKS(-23, "Support for symbolic links"),
+    DELEGATION_TOKEN(-24, "Support for delegation tokens for security"),
+    FSIMAGE_COMPRESSION(-25, "Support for fsimage compression"),
+    FSIMAGE_CHECKSUM(-26, "Support checksum for fsimage"),
+    REMOVE_REL13_DISK_LAYOUT_SUPPORT(-27, "Remove support for 0.13 disk layout"),
+    EDITS_CHESKUM(-28, "Support checksum for editlog"),
+    UNUSED(-29, "Skipped version"),
+    FSIMAGE_NAME_OPTIMIZATION(-30, "Store only last part of path in fsimage"),
+    RESERVED_REL20_203(-31, -19, "Reserved for release 0.20.203", true,
+        DELEGATION_TOKEN),
+    RESERVED_REL20_204(-32, -31, "Reserved for release 0.20.204", true),
+    RESERVED_REL22(-33, -27, "Reserved for release 0.22", true),
+    RESERVED_REL23(-34, -30, "Reserved for release 0.23", true),
+    // layout versions -35 - -40 are features not present on this branch
+    RESERVED_REL1_2_0(-41, -32, "Reserved for release 1.2.0", true, CONCAT);
+    
+    final int lv;
+    final int ancestorLV;
+    final String description;
+    final boolean reserved;
+    final Feature[] specialFeatures;
+    
+    /**
+     * Feature that is added at layout version {@code lv} - 1. 
+     * @param lv new layout version with the addition of this feature
+     * @param description description of the feature
+     */
+    Feature(final int lv, final String description) {
+      this(lv, lv + 1, description, false);
+    }
+
+    /**
+     * Feature that is added at layout version {@code ancestoryLV}.
+     * @param lv new layout version with the addition of this feature
+     * @param ancestorLV layout version from which the new lv is derived from.
+     * @param description description of the feature
+     * @param reserved true when this is a layout version reserved for previous
+     *          verions
+     * @param features set of features that are to be enabled for this version
+     */
+    Feature(final int lv, final int ancestorLV, final String description,
+        boolean reserved, Feature... features) {
+      this.lv = lv;
+      this.ancestorLV = ancestorLV;
+      this.description = description;
+      this.reserved = reserved;
+      specialFeatures = features;
+    }
+    
+    /** 
+     * Accessor method for feature layout version 
+     * @return int lv value
+     */
+    public int getLayoutVersion() {
+      return lv;
+    }
+
+    /** 
+     * Accessor method for feature ancestor layout version 
+     * @return int ancestor LV value
+     */
+    public int getAncestorLayoutVersion() {
+      return ancestorLV;
+    }
+
+    /** 
+     * Accessor method for feature description 
+     * @return String feature description 
+     */
+    public String getDescription() {
+      return description;
+    }
+    
+    public boolean isReservedForOldRelease() {
+      return reserved;
+    }
+  }
+  
+  // Build layout version and corresponding feature matrix
+  static final Map<Integer, EnumSet<Feature>>map = 
+    new HashMap<Integer, EnumSet<Feature>>();
+  
+  // Static initialization 
+  static {
+    initMap();
+  }
+  
+  /**
+   * Initialize the map of a layout version and EnumSet of {@link Feature}s 
+   * supported.
+   */
+  private static void initMap() {
+    // Go through all the enum constants and build a map of
+    // LayoutVersion <-> EnumSet of all supported features in that LayoutVersion
+    for (Feature f : Feature.values()) {
+      EnumSet<Feature> ancestorSet = map.get(f.ancestorLV);
+      if (ancestorSet == null) {
+        ancestorSet = EnumSet.noneOf(Feature.class); // Empty enum set
+        map.put(f.ancestorLV, ancestorSet);
+      }
+      EnumSet<Feature> featureSet = EnumSet.copyOf(ancestorSet);
+      if (f.specialFeatures != null) {
+        for (Feature specialFeature : f.specialFeatures) {
+          featureSet.add(specialFeature);
+        }
+      }
+      featureSet.add(f);
+      map.put(f.lv, featureSet);
+    }
+  }
+  
+  /**
+   * Gets formatted string that describes {@link LayoutVersion} information.
+   */
+  public static String getString() {
+    final StringBuilder buf = new StringBuilder();
+    buf.append("Feature List:\n");
+    for (Feature f : Feature.values()) {
+      buf.append(f).append(" introduced in layout version ")
+          .append(f.lv).append(" (").
+      append(f.description).append(")\n");
+    }
+    
+    buf.append("\n\nLayoutVersion and supported features:\n");
+    for (Feature f : Feature.values()) {
+      buf.append(f.lv).append(": ").append(map.get(f.lv))
+          .append("\n");
+    }
+    return buf.toString();
+  }
+  
+  /**
+   * Returns true if a given feature is supported in the given layout version
+   * @param f Feature
+   * @param lv LayoutVersion
+   * @return true if {@code f} is supported in layout version {@code lv}
+   */
+  public static boolean supports(final Feature f, final int lv) {
+    final EnumSet<Feature> set =  map.get(lv);
+    return set != null && set.contains(f);
+  }
+  
+  /**
+   * Get the current layout version
+   */
+  public static int getCurrentLayoutVersion() {
+    Feature[] values = Feature.values();
+    for (int i = values.length -1; i >= 0; i--) {
+      if (!values[i].isReservedForOldRelease()) {
+        return values[i].lv;
+      }
+    }
+    throw new AssertionError("All layout versions are reserved.");
+  }
+}

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Sun Mar 31 04:27:29 2013
@@ -1820,7 +1820,7 @@ public class FSImage extends Storage {
   }
 
   static private final UTF8 U_STR = new UTF8();
-  static String readString(DataInputStream in) throws IOException {
+  public static String readString(DataInputStream in) throws IOException {
     U_STR.readFields(in);
     return U_STR.toString();
   }
@@ -1830,7 +1830,7 @@ public class FSImage extends Storage {
     return s.isEmpty()? null: s;
   }
 
-  static byte[] readBytes(DataInputStream in) throws IOException {
+  public static byte[] readBytes(DataInputStream in) throws IOException {
     U_STR.readFields(in);
     int len = U_STR.getLength();
     byte[] bytes = new byte[len];

Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java Sun Mar 31 04:27:29 2013
@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
@@ -294,8 +297,15 @@ public class NameNode implements ClientP
     
     myMetrics = NameNodeInstrumentation.create(conf);
     this.namesystem = new FSNamesystem(this, conf);
+    
+    // For testing purposes, allow the DT secret manager to be started regardless
+    // of whether security is enabled.
+    boolean alwaysUseDelegationTokensForTests = 
+      conf.getBoolean(DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
+          DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
 
-    if (UserGroupInformation.isSecurityEnabled()) {
+    if (UserGroupInformation.isSecurityEnabled() ||
+        alwaysUseDelegationTokensForTests) {
       namesystem.activateSecretManager();
     }
 

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+/**
+ * A DelimitedImageVisitor generates a text representation of the fsimage,
+ * with each element separated by a delimiter string.  All of the elements
+ * common to both inodes and inodes-under-construction are included. When 
+ * processing an fsimage with a layout version that did not include an 
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ * 
+ * Individual block information for each file is not currently included.
+ * 
+ * The default delimiter is tab, as this is an unlikely value to be included
+ * an inode path or other text metadata.  The delimiter value can be via the
+ * constructor.
+ */
+class DelimitedImageVisitor extends TextWriterImageVisitor {
+  private static final String defaultDelimiter = "\t"; 
+  
+  final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+  private long fileSize = 0l;
+  // Elements of fsimage we're interested in tracking
+  private final Collection<ImageElement> elementsToTrack;
+  // Values for each of the elements in elementsToTrack
+  private final AbstractMap<ImageElement, String> elements = 
+                                            new HashMap<ImageElement, String>();
+  private final String delimiter;
+
+  {
+    elementsToTrack = new ArrayList<ImageElement>();
+    
+    // This collection determines what elements are tracked and the order
+    // in which they are output
+    Collections.addAll(elementsToTrack,  ImageElement.INODE_PATH,
+                                         ImageElement.REPLICATION,
+                                         ImageElement.MODIFICATION_TIME,
+                                         ImageElement.ACCESS_TIME,
+                                         ImageElement.BLOCK_SIZE,
+                                         ImageElement.NUM_BLOCKS,
+                                         ImageElement.NUM_BYTES,
+                                         ImageElement.NS_QUOTA,
+                                         ImageElement.DS_QUOTA,
+                                         ImageElement.PERMISSION_STRING,
+                                         ImageElement.USER_NAME,
+                                         ImageElement.GROUP_NAME);
+  }
+  
+  public DelimitedImageVisitor(String filename) throws IOException {
+    this(filename, false);
+  }
+
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen) 
+                                                           throws IOException {
+    this(outputFile, printToScreen, defaultDelimiter);
+  }
+  
+  public DelimitedImageVisitor(String outputFile, boolean printToScreen, 
+                               String delimiter) throws IOException {
+    super(outputFile, printToScreen);
+    this.delimiter = delimiter;
+    reset();
+  }
+
+  /**
+   * Reset the values of the elements we're tracking in order to handle
+   * the next file
+   */
+  private void reset() {
+    elements.clear();
+    for(ImageElement e : elementsToTrack) 
+      elements.put(e, null);
+    
+    fileSize = 0l;
+  }
+  
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemQ.pop();
+
+    // If we're done with an inode, write out our results and start over
+    if(elem == ImageElement.INODE || 
+       elem == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      writeLine();
+      write("\n");
+      reset();
+    }
+  }
+
+  /**
+   * Iterate through all the elements we're tracking and, if a value was
+   * recorded for it, write it out.
+   */
+  private void writeLine() throws IOException {
+    Iterator<ImageElement> it = elementsToTrack.iterator();
+    
+    while(it.hasNext()) {
+      ImageElement e = it.next();
+      
+      String v = null;
+      if(e == ImageElement.NUM_BYTES)
+        v = String.valueOf(fileSize);
+      else
+        v = elements.get(e);
+      
+      if(v != null)
+        write(v);
+      
+      if(it.hasNext())
+        write(delimiter);
+    }
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    // Explicitly label the root path
+    if(element == ImageElement.INODE_PATH && value.equals(""))
+      value = "/";
+    
+    // Special case of file size, which is sum of the num bytes in each block
+    if(element == ImageElement.NUM_BYTES)
+      fileSize += Long.valueOf(value);
+    
+    if(elements.containsKey(element) && element != ImageElement.NUM_BYTES)
+      elements.put(element, value);
+    
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemQ.push(element);
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element, ImageElement key,
+      String value) throws IOException {
+    // Special case as numBlocks is an attribute of the blocks element
+    if(key == ImageElement.NUM_BLOCKS 
+        && elements.containsKey(ImageElement.NUM_BLOCKS))
+      elements.put(key, value);
+    
+    elemQ.push(element);
+  }
+  
+  @Override
+  void start() throws IOException { /* Nothing to do */ }
+}

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Utility class for tracking descent into the structure of the
+ * Visitor class (ImageVisitor, EditsVisitor etc.)
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DepthCounter {
+  private int depth = 0;
+
+  public void incLevel() { depth++; }
+  public void decLevel() { if(depth >= 1) depth--; }
+  public int  getLevel() { return depth; }
+}
+

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * File size distribution visitor.
+ * 
+ * <h3>Description.</h3>
+ * This is the tool for analyzing file sizes in the namespace image.
+ * In order to run the tool one should define a range of integers
+ * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
+ * The range of integers is divided into segments of size <tt>step</tt>: 
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
+ * and the visitor calculates how many files in the system fall into 
+ * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. 
+ * Note that files larger than <tt>maxSize</tt> always fall into 
+ * the very last segment.
+ * 
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3>
+ * The output file is formatted as a tab separated two column table:
+ * Size and NumFiles. Where Size represents the start of the segment,
+ * and numFiles is the number of files form the image which size falls in 
+ * this segment.
+ */
+class FileDistributionVisitor extends TextWriterImageVisitor {
+  final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
+
+  private final static long MAX_SIZE_DEFAULT = 0x2000000000L;   // 1/8 TB = 2^37
+  private final static int INTERVAL_DEFAULT = 0x200000;         // 2 MB = 2^21
+
+  private int[] distribution;
+  private long maxSize;
+  private int step;
+
+  private int totalFiles;
+  private int totalDirectories;
+  private int totalBlocks;
+  private long totalSpace;
+  private long maxFileSize;
+
+  private FileContext current;
+
+  private boolean inInode = false;
+
+  /**
+   * File or directory information.
+   */
+  private static class FileContext {
+    String path;
+    long fileSize;
+    int numBlocks;
+    int replication;
+  }
+
+  public FileDistributionVisitor(String filename,
+                                 long maxSize,
+                                 int step) throws IOException {
+    super(filename, false);
+    this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
+    this.step = (step == 0 ? INTERVAL_DEFAULT : step);
+    long numIntervals = this.maxSize / this.step;
+    if(numIntervals >= Integer.MAX_VALUE)
+      throw new IOException("Too many distribution intervals " + numIntervals);
+    this.distribution = new int[1 + (int)(numIntervals)];
+    this.totalFiles = 0;
+    this.totalDirectories = 0;
+    this.totalBlocks = 0;
+    this.totalSpace = 0;
+    this.maxFileSize = 0;
+  }
+
+  @Override
+  void start() throws IOException {}
+
+  @Override
+  void finish() throws IOException {
+    // write the distribution into the output file
+    write("Size\tNumFiles\n");
+    for(int i = 0; i < distribution.length; i++)
+      write(((long)i * step) + "\t" + distribution[i] + "\n");
+    System.out.println("totalFiles = " + totalFiles);
+    System.out.println("totalDirectories = " + totalDirectories);
+    System.out.println("totalBlocks = " + totalBlocks);
+    System.out.println("totalSpace = " + totalSpace);
+    System.out.println("maxFileSize = " + maxFileSize);
+    super.finish();
+  }
+
+  @Override
+  void leaveEnclosingElement() throws IOException {
+    ImageElement elem = elemS.pop();
+
+    if(elem != ImageElement.INODE &&
+       elem != ImageElement.INODE_UNDER_CONSTRUCTION)
+      return;
+    inInode = false;
+    if(current.numBlocks < 0) {
+      totalDirectories ++;
+      return;
+    }
+    totalFiles++;
+    totalBlocks += current.numBlocks;
+    totalSpace += current.fileSize * current.replication;
+    if(maxFileSize < current.fileSize)
+      maxFileSize = current.fileSize;
+    int high;
+    if(current.fileSize > maxSize)
+      high = distribution.length-1;
+    else
+      high = (int)Math.ceil((double)current.fileSize / step);
+    distribution[high]++;
+    if(totalFiles % 1000000 == 1)
+      System.out.println("Files processed: " + totalFiles
+          + "  Current: " + current.path);
+  }
+
+  @Override
+  void visit(ImageElement element, String value) throws IOException {
+    if(inInode) {
+      switch(element) {
+      case INODE_PATH:
+        current.path = (value.equals("") ? "/" : value);
+        break;
+      case REPLICATION:
+        current.replication = Integer.valueOf(value);
+        break;
+      case NUM_BYTES:
+        current.fileSize += Long.valueOf(value);
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION) {
+      current = new FileContext();
+      inInode = true;
+    }
+  }
+
+  @Override
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException {
+    elemS.push(element);
+    if(element == ImageElement.INODE ||
+       element == ImageElement.INODE_UNDER_CONSTRUCTION)
+      inInode = true;
+    else if(element == ImageElement.BLOCKS)
+      current.numBlocks = Integer.parseInt(value);
+  }
+}

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An ImageLoader can accept a DataInputStream to an Hadoop FSImage file
+ * and walk over its structure using the supplied ImageVisitor.
+ *
+ * Each implementation of ImageLoader is designed to rapidly process an
+ * image file.  As long as minor changes are made from one layout version
+ * to another, it is acceptable to tweak one implementation to read the next.
+ * However, if the layout version changes enough that it would make a
+ * processor slow or difficult to read, another processor should be created.
+ * This allows each processor to quickly read an image without getting
+ * bogged down in dealing with significant differences between layout versions.
+ */
+interface ImageLoader {
+
+  /**
+   * @param in DataInputStream pointing to an Hadoop FSImage file
+   * @param v Visit to apply to the FSImage file
+   * @param enumerateBlocks Should visitor visit each of the file blocks?
+   */
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean enumerateBlocks) throws IOException;
+
+  /**
+   * Can this processor handle the specified version of FSImage file?
+   *
+   * @param version FSImage version file
+   * @return True if this instance can process the file
+   */
+  public boolean canLoadVersion(int version);
+
+  /**
+   * Factory for obtaining version of image loader that can read
+   * a particular image format.
+   */
+  @InterfaceAudience.Private
+  public class LoaderFactory {
+    // Java doesn't support static methods on interfaces, which necessitates
+    // this factory class
+
+    /**
+     * Find an image loader capable of interpreting the specified
+     * layout version number.  If none, return null;
+     *
+     * @param version fsimage layout version number to be processed
+     * @return ImageLoader that can interpret specified version, or null
+     */
+    static public ImageLoader getLoader(int version) {
+      // Easy to add more image processors as they are written
+      ImageLoader[] loaders = { new ImageLoaderCurrent() };
+
+      for (ImageLoader l : loaders) {
+        if (l.canLoadVersion(version))
+          return l;
+      }
+
+      return null;
+    }
+  }
+}

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,465 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor.ImageElement;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+/**
+ * ImageLoaderCurrent processes Hadoop FSImage files and walks over
+ * them using a provided ImageVisitor, calling the visitor at each element
+ * enumerated below.
+ *
+ * The only difference between v18 and v19 was the utilization of the
+ * stickybit.  Therefore, the same viewer can reader either format.
+ *
+ * Versions -19 fsimage layout (with changes from -16 up):
+ * Image version (int)
+ * Namepsace ID (int)
+ * NumFiles (long)
+ * Generation stamp (long)
+ * INodes (count = NumFiles)
+ *  INode
+ *    Path (String)
+ *    Replication (short)
+ *    Modification Time (long as date)
+ *    Access Time (long) // added in -16
+ *    Block size (long)
+ *    Num blocks (int)
+ *    Blocks (count = Num blocks)
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Namespace Quota (long)
+ *    Diskspace Quota (long) // added in -18
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)  // Modified in -19
+ *    Symlink (String) // added in -23
+ * NumINodesUnderConstruction (int)
+ * INodesUnderConstruction (count = NumINodesUnderConstruction)
+ *  INodeUnderConstruction
+ *    Path (bytes as string)
+ *    Replication (short)
+ *    Modification time (long as date)
+ *    Preferred block size (long)
+ *    Num blocks (int)
+ *    Blocks
+ *      Block
+ *        Block ID (long)
+ *        Num bytes (long)
+ *        Generation stamp (long)
+ *    Permissions
+ *      Username (String)
+ *      Groupname (String)
+ *      OctalPerms (short -> String)
+ *    Client Name (String)
+ *    Client Machine (String)
+ *    NumLocations (int)
+ *    DatanodeDescriptors (count = numLocations) // not loaded into memory
+ *      short                                    // but still in file
+ *      long
+ *      string
+ *      long
+ *      int
+ *      string
+ *      string
+ *      enum
+ *    CurrentDelegationKeyId (int)
+ *    NumDelegationKeys (int)
+ *      DelegationKeys (count = NumDelegationKeys)
+ *        DelegationKeyLength (vint)
+ *        DelegationKey (bytes)
+ *    DelegationTokenSequenceNumber (int)
+ *    NumDelegationTokens (int)
+ *    DelegationTokens (count = NumDelegationTokens)
+ *      DelegationTokenIdentifier
+ *        owner (String)
+ *        renewer (String)
+ *        realUser (String)
+ *        issueDate (vlong)
+ *        maxDate (vlong)
+ *        sequenceNumber (vint)
+ *        masterKeyId (vint)
+ *      expiryTime (long)     
+ *
+ */
+class ImageLoaderCurrent implements ImageLoader {
+  protected final DateFormat dateFormat = 
+                                      new SimpleDateFormat("yyyy-MM-dd HH:mm");
+  private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
+      -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -41};
+  private int imageVersion = 0;
+
+  /* (non-Javadoc)
+   * @see ImageLoader#canProcessVersion(int)
+   */
+  @Override
+  public boolean canLoadVersion(int version) {
+    for(int v : versions)
+      if(v == version) return true;
+
+    return false;
+  }
+
+  /* (non-Javadoc)
+   * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
+   */
+  @Override
+  public void loadImage(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    boolean done = false;
+    try {
+      v.start();
+      v.visitEnclosingElement(ImageElement.FS_IMAGE);
+
+      imageVersion = in.readInt();
+      if( !canLoadVersion(imageVersion))
+        throw new IOException("Cannot process fslayout version " + imageVersion);
+
+      v.visit(ImageElement.IMAGE_VERSION, imageVersion);
+      v.visit(ImageElement.NAMESPACE_ID, in.readInt());
+
+      long numInodes = in.readLong();
+
+      v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+
+      if (LayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
+        boolean isCompressed = in.readBoolean();
+        v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
+        if (isCompressed) {
+          String codecClassName = Text.readString(in);
+          v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
+          CompressionCodecFactory codecFac = new CompressionCodecFactory(
+              new Configuration());
+          CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
+          if (codec == null) {
+            throw new IOException("Image compression codec not supported: "
+                + codecClassName);
+          }
+          in = new DataInputStream(codec.createInputStream(in));
+        }
+      }
+      processINodes(in, v, numInodes, skipBlocks);
+
+      processINodesUC(in, v, skipBlocks);
+
+      if (LayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
+        processDelegationTokens(in, v);
+      }
+      
+      v.leaveEnclosingElement(); // FSImage
+      done = true;
+    } finally {
+      if (done) {
+        v.finish();
+      } else {
+        v.finishAbnormally();
+      }
+    }
+  }
+
+  /**
+   * Process the Delegation Token related section in fsimage.
+   * 
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over records
+   */
+  private void processDelegationTokens(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visit(ImageElement.CURRENT_DELEGATION_KEY_ID, in.readInt());
+    int numDKeys = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_KEYS,
+        ImageElement.NUM_DELEGATION_KEYS, numDKeys);
+    for(int i =0; i < numDKeys; i++) {
+      DelegationKey key = new DelegationKey();
+      key.readFields(in);
+      v.visit(ImageElement.DELEGATION_KEY, key.toString());
+    }
+    v.leaveEnclosingElement();
+    v.visit(ImageElement.DELEGATION_TOKEN_SEQUENCE_NUMBER, in.readInt());
+    int numDTokens = in.readInt();
+    v.visitEnclosingElement(ImageElement.DELEGATION_TOKENS,
+        ImageElement.NUM_DELEGATION_TOKENS, numDTokens);
+    for(int i=0; i<numDTokens; i++){
+      DelegationTokenIdentifier id = new  DelegationTokenIdentifier();
+      id.readFields(in);
+      long expiryTime = in.readLong();
+      v.visitEnclosingElement(ImageElement.DELEGATION_TOKEN_IDENTIFIER);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_KIND,
+          id.getKind().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+          id.getSequenceNumber());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+          id.getRenewer().toString());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+          id.getIssueDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+          id.getMaxDate());
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+          expiryTime);
+      v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+          id.getMasterKeyId());
+      v.leaveEnclosingElement(); // DELEGATION_TOKEN_IDENTIFIER
+    }
+    v.leaveEnclosingElement(); // DELEGATION_TOKENS
+  }
+
+  /**
+   * Process the INodes under construction section of the fsimage.
+   *
+   * @param in DataInputStream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processINodesUC(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks) throws IOException {
+    int numINUC = in.readInt();
+
+    v.visitEnclosingElement(ImageElement.INODES_UNDER_CONSTRUCTION,
+                           ImageElement.NUM_INODES_UNDER_CONSTRUCTION, numINUC);
+
+    for(int i = 0; i < numINUC; i++) {
+      v.visitEnclosingElement(ImageElement.INODE_UNDER_CONSTRUCTION);
+      byte [] name = FSImage.readBytes(in);
+      String n = new String(name, "UTF8");
+      v.visit(ImageElement.INODE_PATH, n);
+      v.visit(ImageElement.REPLICATION, in.readShort());
+      v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+      v.visit(ImageElement.PREFERRED_BLOCK_SIZE, in.readLong());
+      int numBlocks = in.readInt();
+      processBlocks(in, v, numBlocks, skipBlocks);
+
+      processPermission(in, v);
+      v.visit(ImageElement.CLIENT_NAME, FSImage.readString(in));
+      v.visit(ImageElement.CLIENT_MACHINE, FSImage.readString(in));
+
+      // Skip over the datanode descriptors, which are still stored in the
+      // file but are not used by the datanode or loaded into memory
+      int numLocs = in.readInt();
+      for(int j = 0; j < numLocs; j++) {
+        in.readShort();
+        in.readLong();
+        in.readLong();
+        in.readLong();
+        in.readInt();
+        FSImage.readString(in);
+        FSImage.readString(in);
+        WritableUtils.readEnum(in, AdminStates.class);
+      }
+
+      v.leaveEnclosingElement(); // INodeUnderConstruction
+    }
+
+    v.leaveEnclosingElement(); // INodesUnderConstruction
+  }
+
+  /**
+   * Process the blocks section of the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   * @param skipBlocks Walk over each block?
+   */
+  private void processBlocks(DataInputStream in, ImageVisitor v,
+      int numBlocks, boolean skipBlocks) throws IOException {
+    v.visitEnclosingElement(ImageElement.BLOCKS,
+                            ImageElement.NUM_BLOCKS, numBlocks);
+    
+    // directory or symlink, no blocks to process    
+    if(numBlocks == -1 || numBlocks == -2) { 
+      v.leaveEnclosingElement(); // Blocks
+      return;
+    }
+    
+    if(skipBlocks) {
+      int bytesToSkip = ((Long.SIZE * 3 /* fields */) / 8 /*bits*/) * numBlocks;
+      if(in.skipBytes(bytesToSkip) != bytesToSkip)
+        throw new IOException("Error skipping over blocks");
+      
+    } else {
+      for(int j = 0; j < numBlocks; j++) {
+        v.visitEnclosingElement(ImageElement.BLOCK);
+        v.visit(ImageElement.BLOCK_ID, in.readLong());
+        v.visit(ImageElement.NUM_BYTES, in.readLong());
+        v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+        v.leaveEnclosingElement(); // Block
+      }
+    }
+    v.leaveEnclosingElement(); // Blocks
+  }
+
+  /**
+   * Extract the INode permissions stored in the fsimage file.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over inodes
+   */
+  private void processPermission(DataInputStream in, ImageVisitor v)
+      throws IOException {
+    v.visitEnclosingElement(ImageElement.PERMISSIONS);
+    v.visit(ImageElement.USER_NAME, Text.readString(in));
+    v.visit(ImageElement.GROUP_NAME, Text.readString(in));
+    FsPermission fsp = new FsPermission(in.readShort());
+    v.visit(ImageElement.PERMISSION_STRING, fsp.toString());
+    v.leaveEnclosingElement(); // Permissions
+  }
+
+  /**
+   * Process the INode records stored in the fsimage.
+   *
+   * @param in Datastream to process
+   * @param v Visitor to walk over INodes
+   * @param numInodes Number of INodes stored in file
+   * @param skipBlocks Process all the blocks within the INode?
+   * @throws VisitException
+   * @throws IOException
+   */
+  private void processINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    v.visitEnclosingElement(ImageElement.INODES,
+        ImageElement.NUM_INODES, numInodes);
+    
+    if (LayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, imageVersion)) {
+      processLocalNameINodes(in, v, numInodes, skipBlocks);
+    } else { // full path name
+      processFullNameINodes(in, v, numInodes, skipBlocks);
+    }
+
+    
+    v.leaveEnclosingElement(); // INodes
+  }
+  
+  /**
+   * Process image with full path name
+   * 
+   * @param in image stream
+   * @param v visitor
+   * @param numInodes number of indoes to read
+   * @param skipBlocks skip blocks or not
+   * @throws IOException if there is any error occurs
+   */
+  private void processLocalNameINodes(DataInputStream in, ImageVisitor v,
+      long numInodes, boolean skipBlocks) throws IOException {
+    // process root
+    processINode(in, v, skipBlocks, "");
+    numInodes--;
+    while (numInodes > 0) {
+      numInodes -= processDirectory(in, v, skipBlocks);
+    }
+  }
+  
+  private int processDirectory(DataInputStream in, ImageVisitor v,
+     boolean skipBlocks) throws IOException {
+    String parentName = FSImage.readString(in);
+    int numChildren = in.readInt();
+    for (int i=0; i<numChildren; i++) {
+      processINode(in, v, skipBlocks, parentName);
+    }
+    return numChildren;
+  }
+  
+   /**
+    * Process image with full path name
+    * 
+    * @param in image stream
+    * @param v visitor
+    * @param numInodes number of indoes to read
+    * @param skipBlocks skip blocks or not
+    * @throws IOException if there is any error occurs
+    */
+   private void processFullNameINodes(DataInputStream in, ImageVisitor v,
+       long numInodes, boolean skipBlocks) throws IOException {
+     for(long i = 0; i < numInodes; i++) {
+       processINode(in, v, skipBlocks, null);
+     }
+   }
+   
+   /**
+    * Process an INode
+    * 
+    * @param in image stream
+    * @param v visitor
+    * @param skipBlocks skip blocks or not
+    * @param parentName the name of its parent node
+    * @throws IOException
+    */
+  private void processINode(DataInputStream in, ImageVisitor v,
+      boolean skipBlocks, String parentName) throws IOException {
+    v.visitEnclosingElement(ImageElement.INODE);
+    String pathName = FSImage.readString(in);
+    if (parentName != null) {  // local name
+      pathName = "/" + pathName;
+      if (!"/".equals(parentName)) { // children of non-root directory
+        pathName = parentName + pathName;
+      }
+    }
+
+    v.visit(ImageElement.INODE_PATH, pathName);
+    v.visit(ImageElement.REPLICATION, in.readShort());
+    v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+    if(LayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion))
+      v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+    v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+    int numBlocks = in.readInt();
+
+    processBlocks(in, v, numBlocks, skipBlocks);
+
+    // File or directory
+    if (numBlocks > 0 || numBlocks == -1) {
+      v.visit(ImageElement.NS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+      if (LayoutVersion.supports(Feature.DISKSPACE_QUOTA, imageVersion))
+        v.visit(ImageElement.DS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+    }
+    if (numBlocks == -2) {
+      v.visit(ImageElement.SYMLINK, Text.readString(in));
+    }
+
+    processPermission(in, v);
+    v.leaveEnclosingElement(); // INode
+  }
+
+  /**
+   * Helper method to format dates during processing.
+   * @param date Date as read from image file
+   * @return String version of date format
+   */
+  private String formatDate(long date) {
+    return dateFormat.format(new Date(date));
+  }
+}

Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+
+/**
+ * An implementation of ImageVisitor can traverse the structure of an
+ * Hadoop fsimage and respond to each of the structures within the file.
+ */
+abstract class ImageVisitor {
+
+  /**
+   * Structural elements of an FSImage that may be encountered within the
+   * file. ImageVisitors are able to handle processing any of these elements.
+   */
+  public enum ImageElement {
+    FS_IMAGE,
+    IMAGE_VERSION,
+    NAMESPACE_ID,
+    IS_COMPRESSED,
+    COMPRESS_CODEC,
+    LAYOUT_VERSION,
+    NUM_INODES,
+    GENERATION_STAMP,
+    INODES,
+    INODE,
+    INODE_PATH,
+    REPLICATION,
+    MODIFICATION_TIME,
+    ACCESS_TIME,
+    BLOCK_SIZE,
+    NUM_BLOCKS,
+    BLOCKS,
+    BLOCK,
+    BLOCK_ID,
+    NUM_BYTES,
+    NS_QUOTA,
+    DS_QUOTA,
+    PERMISSIONS,
+    SYMLINK,
+    NUM_INODES_UNDER_CONSTRUCTION,
+    INODES_UNDER_CONSTRUCTION,
+    INODE_UNDER_CONSTRUCTION,
+    PREFERRED_BLOCK_SIZE,
+    CLIENT_NAME,
+    CLIENT_MACHINE,
+    USER_NAME,
+    GROUP_NAME,
+    PERMISSION_STRING,
+    CURRENT_DELEGATION_KEY_ID,
+    NUM_DELEGATION_KEYS,
+    DELEGATION_KEYS,
+    DELEGATION_KEY,
+    DELEGATION_TOKEN_SEQUENCE_NUMBER,
+    NUM_DELEGATION_TOKENS,
+    DELEGATION_TOKENS,
+    DELEGATION_TOKEN_IDENTIFIER,
+    DELEGATION_TOKEN_IDENTIFIER_KIND,
+    DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+    DELEGATION_TOKEN_IDENTIFIER_OWNER,
+    DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+    DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+    DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+    DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+    DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID
+  }
+  
+  /**
+   * Begin visiting the fsimage structure.  Opportunity to perform
+   * any initialization necessary for the implementing visitor.
+   */
+  abstract void start() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure.  Opportunity to perform any
+   * clean up necessary for the implementing visitor.
+   */
+  abstract void finish() throws IOException;
+
+  /**
+   * Finish visiting the fsimage structure after an error has occurred
+   * during the processing.  Opportunity to perform any clean up necessary
+   * for the implementing visitor.
+   */
+  abstract void finishAbnormally() throws IOException;
+
+  /**
+   * Visit non enclosing element of fsimage with specified value.
+   *
+   * @param element FSImage element
+   * @param value Element's value
+   */
+  abstract void visit(ImageElement element, String value) throws IOException;
+
+  // Convenience methods to automatically convert numeric value types to strings
+  void visit(ImageElement element, int value) throws IOException {
+    visit(element, Integer.toString(value));
+  }
+
+  void visit(ImageElement element, long value) throws IOException {
+    visit(element, Long.toString(value));
+  }
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * @param element Element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element)
+     throws IOException;
+
+  /**
+   * Begin visiting an element that encloses another element, such as
+   * the beginning of the list of blocks that comprise a file.
+   *
+   * Also provide an additional key and value for the element, such as the
+   * number items within the element.
+   *
+   * @param element Element being visited
+   * @param key Key describing the element being visited
+   * @param value Value associated with element being visited
+   */
+  abstract void visitEnclosingElement(ImageElement element,
+      ImageElement key, String value) throws IOException;
+
+  // Convenience methods to automatically convert value types to strings
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, int value)
+     throws IOException {
+    visitEnclosingElement(element, key, Integer.toString(value));
+  }
+
+  void visitEnclosingElement(ImageElement element,
+      ImageElement key, long value)
+     throws IOException {
+    visitEnclosingElement(element, key, Long.toString(value));
+  }
+
+  /**
+   * Leave current enclosing element.  Called, for instance, at the end of
+   * processing the blocks that compromise a file.
+   */
+  abstract void leaveEnclosingElement() throws IOException;
+}