You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by su...@apache.org on 2013/03/31 06:27:31 UTC
svn commit: r1462875 [1/2] - in /hadoop/common/branches/branch-1: ./ bin/
src/core/org/apache/hadoop/io/compress/
src/docs/src/documentation/content/xdocs/ src/hdfs/org/apache/hadoop/hdfs/
src/hdfs/org/apache/hadoop/hdfs/protocol/ src/hdfs/org/apache/h...
Author: suresh
Date: Sun Mar 31 04:27:29 2013
New Revision: 1462875
URL: http://svn.apache.org/r1462875
Log:
HDFS-4651. Offline Image Viewer backport to branch-1. Contributed by Chris Nauroth.
Added:
hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/IndentedImageVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsImageVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/NameDistributionVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewer.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/TextWriterImageVisitor.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/SpotCheckImageVisitor.java
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestDelimitedImageVisitor.java
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOIVCanReadOldVersions.java
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV18 (with props)
hadoop/common/branches/branch-1/src/test/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV19 (with props)
Modified:
hadoop/common/branches/branch-1/bin/hadoop
hadoop/common/branches/branch-1/build.xml
hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java
hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java
Modified: hadoop/common/branches/branch-1/bin/hadoop
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/bin/hadoop?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/bin/hadoop (original)
+++ hadoop/common/branches/branch-1/bin/hadoop Sun Mar 31 04:27:29 2013
@@ -81,6 +81,7 @@ print_usage()
echo " fsck run a DFS filesystem checking utility"
echo " fs run a generic filesystem user client"
echo " balancer run a cluster balancing utility"
+ echo " oiv apply the offline fsimage viewer to an fsimage"
echo " fetchdt fetch a delegation token from the NameNode"
echo " jobtracker run the MapReduce job Tracker node"
echo " pipes run a Pipes job"
@@ -287,6 +288,8 @@ elif [ "$COMMAND" = "fsck" ] ; then
elif [ "$COMMAND" = "balancer" ] ; then
CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_BALANCER_OPTS"
+elif [ "$COMMAND" = "oiv" ] ; then
+ CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
elif [ "$COMMAND" = "fetchdt" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
elif [ "$COMMAND" = "jobtracker" ] ; then
Modified: hadoop/common/branches/branch-1/build.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/build.xml?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/build.xml (original)
+++ hadoop/common/branches/branch-1/build.xml Sun Mar 31 04:27:29 2013
@@ -936,6 +936,8 @@
<copy file="${test.src.dir}/org/apache/hadoop/cli/clitest_data/data30bytes" todir="${test.cache.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/cli/clitest_data/data60bytes" todir="${test.cache.data}"/>
<copy file="${test.src.dir}/org/apache/hadoop/cli/clitest_data/data120bytes" todir="${test.cache.data}"/>
+ <copy file="${test.src.dir}/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV18" todir="${test.cache.data}"/>
+ <copy file="${test.src.dir}/org/apache/hadoop/hdfs/tools/offlineImageViewer/fsimageV19" todir="${test.cache.data}"/>
</target>
<!-- ================================================================== -->
Modified: hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java (original)
+++ hadoop/common/branches/branch-1/src/core/org/apache/hadoop/io/compress/CompressionCodecFactory.java Sun Mar 31 04:27:29 2013
@@ -39,10 +39,30 @@ public class CompressionCodecFactory {
* automatically supports finding the longest matching suffix.
*/
private SortedMap<String, CompressionCodec> codecs = null;
+
+ /**
+ * A map from the reversed filename suffixes to the codecs.
+ * This is probably overkill, because the maps should be small, but it
+ * automatically supports finding the longest matching suffix.
+ */
+ private Map<String, CompressionCodec> codecsByName = null;
+
+ /**
+ * A map from class names to the codecs
+ */
+ private HashMap<String, CompressionCodec> codecsByClassName = null;
private void addCodec(CompressionCodec codec) {
String suffix = codec.getDefaultExtension();
codecs.put(new StringBuffer(suffix).reverse().toString(), codec);
+ codecsByClassName.put(codec.getClass().getCanonicalName(), codec);
+
+ String codecName = codec.getClass().getSimpleName();
+ codecsByName.put(codecName.toLowerCase(), codec);
+ if (codecName.endsWith("Codec")) {
+ codecName = codecName.substring(0, codecName.length() - "Codec".length());
+ codecsByName.put(codecName.toLowerCase(), codec);
+ }
}
/**
@@ -131,6 +151,8 @@ public class CompressionCodecFactory {
*/
public CompressionCodecFactory(Configuration conf) {
codecs = new TreeMap<String, CompressionCodec>();
+ codecsByClassName = new HashMap<String, CompressionCodec>();
+ codecsByName = new HashMap<String, CompressionCodec>();
List<Class<? extends CompressionCodec>> codecClasses = getCodecClasses(conf);
if (codecClasses == null) {
addCodec(new GzipCodec());
@@ -168,6 +190,68 @@ public class CompressionCodecFactory {
}
/**
+ * Find the relevant compression codec for the codec's canonical class name.
+ * @param classname the canonical class name of the codec
+ * @return the codec object
+ */
+ public CompressionCodec getCodecByClassName(String classname) {
+ if (codecsByClassName == null) {
+ return null;
+ }
+ return codecsByClassName.get(classname);
+ }
+
+ /**
+ * Find the relevant compression codec for the codec's canonical class name
+ * or by codec alias.
+ * <p/>
+ * Codec aliases are case insensitive.
+ * <p/>
+ * The code alias is the short class name (without the package name).
+ * If the short class name ends with 'Codec', then there are two aliases for
+ * the codec, the complete short class name and the short class name without
+ * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ * alias are 'gzip' and 'gzipcodec'.
+ *
+ * @param codecName the canonical class name of the codec
+ * @return the codec object
+ */
+ public CompressionCodec getCodecByName(String codecName) {
+ if (codecsByClassName == null) {
+ return null;
+ }
+ CompressionCodec codec = getCodecByClassName(codecName);
+ if (codec == null) {
+ // trying to get the codec by name in case the name was specified instead a class
+ codec = codecsByName.get(codecName.toLowerCase());
+ }
+ return codec;
+ }
+
+ /**
+ * Find the relevant compression codec for the codec's canonical class name
+ * or by codec alias and returns its implemetation class.
+ * <p/>
+ * Codec aliases are case insensitive.
+ * <p/>
+ * The code alias is the short class name (without the package name).
+ * If the short class name ends with 'Codec', then there are two aliases for
+ * the codec, the complete short class name and the short class name without
+ * the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ * alias are 'gzip' and 'gzipcodec'.
+ *
+ * @param codecName the canonical class name of the codec
+ * @return the codec class
+ */
+ public Class<? extends CompressionCodec> getCodecClassByName(String codecName) {
+ CompressionCodec codec = getCodecByName(codecName);
+ if (codec == null) {
+ return null;
+ }
+ return codec.getClass();
+ }
+
+ /**
* Removes a suffix from a filename, if it has it.
* @param filename the filename to strip
* @param suffix the suffix to remove
Added: hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml (added)
+++ hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/hdfs_imageviewer.xml Sun Mar 31 04:27:29 2013
@@ -0,0 +1,427 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+
+ <header>
+ <title>Offline Image Viewer Guide</title>
+ </header>
+
+ <body>
+
+ <section>
+ <title>Overview</title>
+
+ <p>The Offline Image Viewer is a tool to dump the contents of hdfs
+ fsimage files to human-readable formats in order to allow offline analysis
+ and examination of an Hadoop cluster's namespace. The tool is able to
+ process very large image files relatively quickly, converting them to
+ one of several output formats. The tool handles the layout formats that
+ were included with Hadoop versions 16 and up. If the tool is not able to
+ process an image file, it will exit cleanly. The Offline Image Viewer does not require
+ an Hadoop cluster to be running; it is entirely offline in its operation.</p>
+
+ <p>The Offline Image Viewer provides several output processors:</p>
+ <ol>
+ <li><strong>Ls</strong> is the default output processor. It closely mimics the format of
+ the <code>lsr </code> command. It includes the same fields, in the same order, as
+ <code>lsr </code>: directory or file flag, permissions, replication, owner, group,
+ file size, modification date, and full path. Unlike the <code>lsr </code> command,
+ the root path is included. One important difference between the output
+ of the <code>lsr </code> command this processor, is that this output is not sorted
+ by directory name and contents. Rather, the files are listed in the
+ order in which they are stored in the fsimage file. Therefore, it is
+ not possible to directly compare the output of the <code>lsr </code> command this
+ this tool. The Ls processor uses information contained within the Inode blocks to
+ calculate file sizes and ignores the <code>-skipBlocks</code> option.</li>
+ <li><strong>Indented</strong> provides a more complete view of the fsimage's contents,
+ including all of the information included in the image, such as image
+ version, generation stamp and inode- and block-specific listings. This
+ processor uses indentation to organize the output into a hierarchal manner.
+ The <code>lsr </code> format is suitable for easy human comprehension.</li>
+ <li><strong>Delimited</strong> provides one file per line consisting of the path,
+ replication, modification time, access time, block size, number of blocks, file size,
+ namespace quota, diskspace quota, permissions, username and group name. If run against
+ an fsimage that does not contain any of these fields, the field's column will be included,
+ but no data recorded. The default record delimiter is a tab, but this may be changed
+ via the <code>-delimiter</code> command line argument. This processor is designed to
+ create output that is easily analyzed by other tools, such as <a href="http://hadoop.apache.org/pig/">Apache Pig</a>.
+ See the <a href="#analysis">Analyzing Results</a> section
+ for further information on using this processor to analyze the contents of fsimage files.</li>
+ <li><strong>XML</strong> creates an XML document of the fsimage and includes all of the
+ information within the fsimage, similar to the <code>lsr </code> processor. The output
+ of this processor is amenable to automated processing and analysis with XML tools.
+ Due to the verbosity of the XML syntax, this processor will also generate
+ the largest amount of output.</li>
+ <li><strong>FileDistribution</strong> is the tool for analyzing file
+ sizes in the namespace image. In order to run the tool one should
+ define a range of integers <code>[0, maxSize]</code> by specifying
+ <code>maxSize</code> and a <code>step</code>.
+ The range of integers is divided into segments of size
+ <code>step</code>:
+ <code>[0, s</code><sub>1</sub><code>, ..., s</code><sub>n-1</sub><code>, maxSize]</code>,
+ and the processor calculates how many files in the system fall into
+ each segment <code>[s</code><sub>i-1</sub><code>, s</code><sub>i</sub><code>)</code>.
+ Note that files larger than <code>maxSize</code> always fall into
+ the very last segment.
+ The output file is formatted as a tab separated two column table:
+ Size and NumFiles. Where Size represents the start of the segment,
+ and numFiles is the number of files form the image which size falls
+ in this segment.</li>
+ </ol>
+
+ </section> <!-- overview -->
+
+ <section>
+ <title>Usage</title>
+
+ <section>
+ <title>Basic</title>
+ <p>The simplest usage of the Offline Image Viewer is to provide just an input and output
+ file, via the <code>-i</code> and <code>-o</code> command-line switches:</p>
+
+ <p><code>bash$ bin/hadoop oiv -i fsimage -o fsimage.txt</code><br/></p>
+
+ <p>This will create a file named fsimage.txt in the current directory using
+ the Ls output processor. For very large image files, this process may take
+ several minutes.</p>
+
+ <p>One can specify which output processor via the command-line switch <code>-p</code>.
+ For instance:</p>
+ <p><code>bash$ bin/hadoop oiv -i fsimage -o fsimage.xml -p XML</code><br/></p>
+
+ <p>or</p>
+
+ <p><code>bash$ bin/hadoop oiv -i fsimage -o fsimage.txt -p Indented</code><br/></p>
+
+ <p>This will run the tool using either the XML or Indented output processor,
+ respectively.</p>
+
+ <p>One command-line option worth considering is <code>-skipBlocks</code>, which
+ prevents the tool from explicitly enumerating all of the blocks that make up
+ a file in the namespace. This is useful for file systems that have very large
+ files. Enabling this option can significantly decrease the size of the resulting
+ output, as individual blocks are not included. Note, however, that the Ls processor
+ needs to enumerate the blocks and so overrides this option.</p>
+
+ </section> <!-- Basic -->
+ <section id="Example">
+ <title>Example</title>
+
+<p>Consider the following contrived namespace:</p>
+<source>
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:17 /anotherDir
+
+-rw-r--r-- 3 theuser supergroup 286631664 2009-03-16 21:15 /anotherDir/biggerfile
+
+-rw-r--r-- 3 theuser supergroup 8754 2009-03-16 21:17 /anotherDir/smallFile
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem
+
+drwx-wx-wx - theuser supergroup 0 2009-03-16 21:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:12 /one
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:12 /one/two
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:16 /user
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 21:19 /user/theuser
+</source>
+
+<p>Applying the Offline Image Processor against this file with default options would result in the following output:</p>
+<source>
+machine:hadoop-0.21.0-dev theuser$ bin/hadoop oiv -i fsimagedemo -o fsimage.txt
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:16 /
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:17 /anotherDir
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:12 /one
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:16 /user
+
+-rw-r--r-- 3 theuser supergroup 286631664 2009-03-16 14:15 /anotherDir/biggerfile
+
+-rw-r--r-- 3 theuser supergroup 8754 2009-03-16 14:17 /anotherDir/smallFile
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem
+
+drwx-wx-wx - theuser supergroup 0 2009-03-16 14:11 /mapredsystem/theuser/mapredsystem/ip.redacted.com
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:12 /one/two
+
+drwxr-xr-x - theuser supergroup 0 2009-03-16 14:19 /user/theuser
+</source>
+
+<p>Similarly, applying the Indented processor would generate output that begins with:</p>
+<source>
+machine:hadoop-0.21.0-dev theuser$ bin/hadoop oiv -i fsimagedemo -p Indented -o fsimage.txt
+
+FSImage
+
+ ImageVersion = -19
+
+ NamespaceID = 2109123098
+
+ GenerationStamp = 1003
+
+ INodes [NumInodes = 12]
+
+ Inode
+
+ INodePath =
+
+ Replication = 0
+
+ ModificationTime = 2009-03-16 14:16
+
+ AccessTime = 1969-12-31 16:00
+
+ BlockSize = 0
+
+ Blocks [NumBlocks = -1]
+
+ NSQuota = 2147483647
+
+ DSQuota = -1
+
+ Permissions
+
+ Username = theuser
+
+ GroupName = supergroup
+
+ PermString = rwxr-xr-x
+
+ remaining output omitted
+</source>
+
+ </section> <!-- example-->
+
+ </section>
+
+ <section id="options">
+ <title>Options</title>
+
+ <section>
+ <title>Option Index</title>
+ <table>
+ <tr><th> Flag </th><th> Description </th></tr>
+ <tr><td><code>[-i|--inputFile] <input file></code></td>
+ <td>Specify the input fsimage file to process. Required.</td></tr>
+ <tr><td><code>[-o|--outputFile] <output file></code></td>
+ <td>Specify the output filename, if the specified output processor
+ generates one. If the specified file already exists, it is silently overwritten. Required.
+ </td></tr>
+ <tr><td><code>[-p|--processor] <processor></code></td>
+ <td>Specify the image processor to apply against the image file. Currently
+ valid options are Ls (default), XML and Indented..
+ </td></tr>
+ <tr><td><code>-skipBlocks</code></td>
+ <td>Do not enumerate individual blocks within files. This may save processing time
+ and outfile file space on namespaces with very large files. The <code>Ls</code> processor reads
+ the blocks to correctly determine file sizes and ignores this option.</td></tr>
+ <tr><td><code>-printToScreen</code></td>
+ <td>Pipe output of processor to console as well as specified file. On extremely
+ large namespaces, this may increase processing time by an order of magnitude.</td></tr>
+ <tr><td><code>-delimiter <arg></code></td>
+ <td>When used in conjunction with the Delimited processor, replaces the default
+ tab delimiter with the string specified by <code>arg</code>.</td></tr>
+ <tr><td><code>[-h|--help]</code></td>
+ <td>Display the tool usage and help information and exit.</td></tr>
+ </table>
+ </section> <!-- options -->
+ </section>
+
+ <section id="analysis">
+ <title>Analyzing Results</title>
+ <p>The Offline Image Viewer makes it easy to gather large amounts of data about the hdfs namespace.
+ This information can then be used to explore file system usage patterns or find
+ specific files that match arbitrary criteria, along with other types of namespace analysis. The Delimited
+ image processor in particular creates
+ output that is amenable to further processing by tools such as <a href="http://hadoop.apache.org/pig/">Apache Pig</a>. Pig provides a particularly
+ good choice for analyzing these data as it is able to deal with the output generated from a small fsimage
+ but also scales up to consume data from extremely large file systems.</p>
+ <p>The Delimited image processor generates lines of text separated, by default, by tabs and includes
+ all of the fields that are common between constructed files and files that were still under constructed
+ when the fsimage was generated. Examples scripts are provided demonstrating how to use this output to
+ accomplish three tasks: determine the number of files each user has created on the file system,
+ find files were created but have not accessed, and find probable duplicates of large files by comparing
+ the size of each file.</p>
+ <p>Each of the following scripts assumes you have generated an output file using the Delimited processor named
+ <code>foo</code> and will be storing the results of the Pig analysis in a file named <code>results</code>.</p>
+ <section>
+ <title>Total Number of Files for Each User</title>
+ <p>This script processes each path within the namespace, groups them by the file owner and determines the total
+ number of files each user owns.</p>
+ <p><strong>numFilesOfEachUser.pig:</strong></p>
+ <source>
+-- This script determines the total number of files each user has in
+-- the namespace. Its output is of the form:
+-- username, totalNumFiles
+
+-- Load all of the fields from the file
+A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
+ replication:int,
+ modTime:chararray,
+ accessTime:chararray,
+ blockSize:long,
+ numBlocks:int,
+ fileSize:long,
+ NamespaceQuota:int,
+ DiskspaceQuota:int,
+ perms:chararray,
+ username:chararray,
+ groupname:chararray);
+
+
+-- Grab just the path and username
+B = FOREACH A GENERATE path, username;
+
+-- Generate the sum of the number of paths for each user
+C = FOREACH (GROUP B BY username) GENERATE group, COUNT(B.path);
+
+-- Save results
+STORE C INTO '$outputFile';
+ </source>
+ <p>This script can be run against pig with the following command:</p>
+ <p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../numFilesOfEachUser.pig</code><br/></p>
+ <p>The output file's content will be similar to that below:</p>
+ <p>
+ <code>bart 1</code><br/>
+ <code>lisa 16</code><br/>
+ <code>homer 28</code><br/>
+ <code>marge 2456</code><br/>
+ </p>
+ </section>
+
+ <section><title>Files That Have Never Been Accessed</title>
+ <p>This script finds files that were created but whose access times were never changed, meaning they were never opened or viewed.</p>
+ <p><strong>neverAccessed.pig:</strong></p>
+ <source>
+-- This script generates a list of files that were created but never
+-- accessed, based on their AccessTime
+
+-- Load all of the fields from the file
+A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
+ replication:int,
+ modTime:chararray,
+ accessTime:chararray,
+ blockSize:long,
+ numBlocks:int,
+ fileSize:long,
+ NamespaceQuota:int,
+ DiskspaceQuota:int,
+ perms:chararray,
+ username:chararray,
+ groupname:chararray);
+
+-- Grab just the path and last time the file was accessed
+B = FOREACH A GENERATE path, accessTime;
+
+-- Drop all the paths that don't have the default assigned last-access time
+C = FILTER B BY accessTime == '1969-12-31 16:00';
+
+-- Drop the accessTimes, since they're all the same
+D = FOREACH C GENERATE path;
+
+-- Save results
+STORE D INTO '$outputFile';
+ </source>
+ <p>This script can be run against pig with the following command and its output file's content will be a list of files that were created but never viewed afterwards.</p>
+ <p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../neverAccessed.pig</code><br/></p>
+ </section>
+ <section><title>Probable Duplicated Files Based on File Size</title>
+ <p>This script groups files together based on their size, drops any that are of less than 100mb and returns a list of the file size, number of files found and a tuple of the file paths. This can be used to find likely duplicates within the filesystem namespace.</p>
+
+ <p><strong>probableDuplicates.pig:</strong></p>
+ <source>
+-- This script finds probable duplicate files greater than 100 MB by
+-- grouping together files based on their byte size. Files of this size
+-- with exactly the same number of bytes can be considered probable
+-- duplicates, but should be checked further, either by comparing the
+-- contents directly or by another proxy, such as a hash of the contents.
+-- The scripts output is of the type:
+-- fileSize numProbableDuplicates {(probableDup1), (probableDup2)}
+
+-- Load all of the fields from the file
+A = LOAD '$inputFile' USING PigStorage('\t') AS (path:chararray,
+ replication:int,
+ modTime:chararray,
+ accessTime:chararray,
+ blockSize:long,
+ numBlocks:int,
+ fileSize:long,
+ NamespaceQuota:int,
+ DiskspaceQuota:int,
+ perms:chararray,
+ username:chararray,
+ groupname:chararray);
+
+-- Grab the pathname and filesize
+B = FOREACH A generate path, fileSize;
+
+-- Drop files smaller than 100 MB
+C = FILTER B by fileSize > 100L * 1024L * 1024L;
+
+-- Gather all the files of the same byte size
+D = GROUP C by fileSize;
+
+-- Generate path, num of duplicates, list of duplicates
+E = FOREACH D generate group AS fileSize, COUNT(C) as numDupes, C.path AS files;
+
+-- Drop all the files where there are only one of them
+F = FILTER E by numDupes > 1L;
+
+-- Sort by the size of the files
+G = ORDER F by fileSize;
+
+-- Save results
+STORE G INTO '$outputFile';
+ </source>
+ <p>This script can be run against pig with the following command:</p>
+ <p><code>bin/pig -x local -param inputFile=../foo -param outputFile=../results ../probableDuplicates.pig</code><br/></p>
+ <p> The output file's content will be similar to that below:</p>
+
+<source>
+1077288632 2 {(/user/tennant/work1/part-00501),(/user/tennant/work1/part-00993)}
+1077288664 4 {(/user/tennant/work0/part-00567),(/user/tennant/work0/part-03980),(/user/tennant/work1/part-00725),(/user/eccelston/output/part-03395)}
+1077288668 3 {(/user/tennant/work0/part-03705),(/user/tennant/work0/part-04242),(/user/tennant/work1/part-03839)}
+1077288698 2 {(/user/tennant/work0/part-00435),(/user/eccelston/output/part-01382)}
+1077288702 2 {(/user/tennant/work0/part-03864),(/user/eccelston/output/part-03234)}
+</source>
+ <p>Each line includes the file size in bytes that was found to be duplicated, the number of duplicates found, and a list of the duplicated paths.
+ Files less than 100MB are ignored, providing a reasonable likelihood that files of these exact sizes may be duplicates.</p>
+ </section>
+ </section>
+
+
+ </body>
+
+</document>
Modified: hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/common/branches/branch-1/src/docs/src/documentation/content/xdocs/site.xml Sun Mar 31 04:27:29 2013
@@ -63,6 +63,7 @@ See http://forrest.apache.org/docs/linki
<hdfs_perm label="Permissions" href="hdfs_permissions_guide.html" />
<hdfs_quotas label="Quotas" href="hdfs_quota_admin_guide.html" />
<hdfs_SLG label="Synthetic Load Generator" href="SLG_user_guide.html" />
+ <hdfs_imageviewer label="Offline Image Viewer" href="hdfs_imageviewer.html" />
<hftp label="HFTP" href="hftp.html"/>
<webhdfs label="WebHDFS REST API" href="webhdfs.html" />
<hdfs_libhdfs label="C API libhdfs" href="libhdfs.html" />
Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/DFSConfigKeys.java Sun Mar 31 04:27:29 2013
@@ -130,6 +130,8 @@ public class DFSConfigKeys extends Commo
public static final long DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = 24*60*60*1000;
public static final String DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY = "dfs.namenode.delegation.token.max-lifetime";
public static final long DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = 7*24*60*60*1000;
+ public static final String DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY = "dfs.namenode.delegation.token.always-use"; // for tests
+ public static final boolean DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT = false;
//Following keys have no defaults
public static final String DFS_DATANODE_DATA_DIR_KEY = "dfs.datanode.data.dir";
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/protocol/LayoutVersion.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocol;
+
+import java.util.EnumSet;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * This class tracks changes in the layout version of HDFS.
+ *
+ * Layout version is changed for following reasons:
+ * <ol>
+ * <li>The layout of how namenode or datanode stores information
+ * on disk changes.</li>
+ * <li>A new operation code is added to the editlog.</li>
+ * <li>Modification such as format of a record, content of a record
+ * in editlog or fsimage.</li>
+ * </ol>
+ * <br>
+ * <b>How to update layout version:<br></b>
+ * When a change requires new layout version, please add an entry into
+ * {@link Feature} with a short enum name, new layout version and description
+ * of the change. Please see {@link Feature} for further details.
+ * <br>
+ */
+@InterfaceAudience.Private
+public class LayoutVersion {
+
+ /**
+ * Version in which HDFS-2991 was fixed. This bug caused OP_ADD to
+ * sometimes be skipped for append() calls. If we see such a case when
+ * loading the edits, but the version is known to have that bug, we
+ * workaround the issue. Otherwise we should consider it a corruption
+ * and bail.
+ */
+ public static final int BUGFIX_HDFS_2991_VERSION = -40;
+
+ /**
+ * Enums for features that change the layout version.
+ * <br><br>
+ * To add a new layout version:
+ * <ul>
+ * <li>Define a new enum constant with a short enum name, the new layout version
+ * and description of the added feature.</li>
+ * <li>When adding a layout version with an ancestor that is not same as
+ * its immediate predecessor, use the constructor where a spacific ancestor
+ * can be passed.
+ * </li>
+ * </ul>
+ */
+ public static enum Feature {
+ NAMESPACE_QUOTA(-16, "Support for namespace quotas"),
+ FILE_ACCESS_TIME(-17, "Support for access time on files"),
+ DISKSPACE_QUOTA(-18, "Support for disk space quotas"),
+ STICKY_BIT(-19, "Support for sticky bits"),
+ APPEND_RBW_DIR(-20, "Datanode has \"rbw\" subdirectory for append"),
+ ATOMIC_RENAME(-21, "Support for atomic rename"),
+ CONCAT(-22, "Support for concat operation"),
+ SYMLINKS(-23, "Support for symbolic links"),
+ DELEGATION_TOKEN(-24, "Support for delegation tokens for security"),
+ FSIMAGE_COMPRESSION(-25, "Support for fsimage compression"),
+ FSIMAGE_CHECKSUM(-26, "Support checksum for fsimage"),
+ REMOVE_REL13_DISK_LAYOUT_SUPPORT(-27, "Remove support for 0.13 disk layout"),
+ EDITS_CHESKUM(-28, "Support checksum for editlog"),
+ UNUSED(-29, "Skipped version"),
+ FSIMAGE_NAME_OPTIMIZATION(-30, "Store only last part of path in fsimage"),
+ RESERVED_REL20_203(-31, -19, "Reserved for release 0.20.203", true,
+ DELEGATION_TOKEN),
+ RESERVED_REL20_204(-32, -31, "Reserved for release 0.20.204", true),
+ RESERVED_REL22(-33, -27, "Reserved for release 0.22", true),
+ RESERVED_REL23(-34, -30, "Reserved for release 0.23", true),
+ // layout versions -35 - -40 are features not present on this branch
+ RESERVED_REL1_2_0(-41, -32, "Reserved for release 1.2.0", true, CONCAT);
+
+ final int lv;
+ final int ancestorLV;
+ final String description;
+ final boolean reserved;
+ final Feature[] specialFeatures;
+
+ /**
+ * Feature that is added at layout version {@code lv} - 1.
+ * @param lv new layout version with the addition of this feature
+ * @param description description of the feature
+ */
+ Feature(final int lv, final String description) {
+ this(lv, lv + 1, description, false);
+ }
+
+ /**
+ * Feature that is added at layout version {@code ancestoryLV}.
+ * @param lv new layout version with the addition of this feature
+ * @param ancestorLV layout version from which the new lv is derived from.
+ * @param description description of the feature
+ * @param reserved true when this is a layout version reserved for previous
+ * verions
+ * @param features set of features that are to be enabled for this version
+ */
+ Feature(final int lv, final int ancestorLV, final String description,
+ boolean reserved, Feature... features) {
+ this.lv = lv;
+ this.ancestorLV = ancestorLV;
+ this.description = description;
+ this.reserved = reserved;
+ specialFeatures = features;
+ }
+
+ /**
+ * Accessor method for feature layout version
+ * @return int lv value
+ */
+ public int getLayoutVersion() {
+ return lv;
+ }
+
+ /**
+ * Accessor method for feature ancestor layout version
+ * @return int ancestor LV value
+ */
+ public int getAncestorLayoutVersion() {
+ return ancestorLV;
+ }
+
+ /**
+ * Accessor method for feature description
+ * @return String feature description
+ */
+ public String getDescription() {
+ return description;
+ }
+
+ public boolean isReservedForOldRelease() {
+ return reserved;
+ }
+ }
+
+ // Build layout version and corresponding feature matrix
+ static final Map<Integer, EnumSet<Feature>>map =
+ new HashMap<Integer, EnumSet<Feature>>();
+
+ // Static initialization
+ static {
+ initMap();
+ }
+
+ /**
+ * Initialize the map of a layout version and EnumSet of {@link Feature}s
+ * supported.
+ */
+ private static void initMap() {
+ // Go through all the enum constants and build a map of
+ // LayoutVersion <-> EnumSet of all supported features in that LayoutVersion
+ for (Feature f : Feature.values()) {
+ EnumSet<Feature> ancestorSet = map.get(f.ancestorLV);
+ if (ancestorSet == null) {
+ ancestorSet = EnumSet.noneOf(Feature.class); // Empty enum set
+ map.put(f.ancestorLV, ancestorSet);
+ }
+ EnumSet<Feature> featureSet = EnumSet.copyOf(ancestorSet);
+ if (f.specialFeatures != null) {
+ for (Feature specialFeature : f.specialFeatures) {
+ featureSet.add(specialFeature);
+ }
+ }
+ featureSet.add(f);
+ map.put(f.lv, featureSet);
+ }
+ }
+
+ /**
+ * Gets formatted string that describes {@link LayoutVersion} information.
+ */
+ public static String getString() {
+ final StringBuilder buf = new StringBuilder();
+ buf.append("Feature List:\n");
+ for (Feature f : Feature.values()) {
+ buf.append(f).append(" introduced in layout version ")
+ .append(f.lv).append(" (").
+ append(f.description).append(")\n");
+ }
+
+ buf.append("\n\nLayoutVersion and supported features:\n");
+ for (Feature f : Feature.values()) {
+ buf.append(f.lv).append(": ").append(map.get(f.lv))
+ .append("\n");
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Returns true if a given feature is supported in the given layout version
+ * @param f Feature
+ * @param lv LayoutVersion
+ * @return true if {@code f} is supported in layout version {@code lv}
+ */
+ public static boolean supports(final Feature f, final int lv) {
+ final EnumSet<Feature> set = map.get(lv);
+ return set != null && set.contains(f);
+ }
+
+ /**
+ * Get the current layout version
+ */
+ public static int getCurrentLayoutVersion() {
+ Feature[] values = Feature.values();
+ for (int i = values.length -1; i >= 0; i--) {
+ if (!values[i].isReservedForOldRelease()) {
+ return values[i].lv;
+ }
+ }
+ throw new AssertionError("All layout versions are reserved.");
+ }
+}
Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSImage.java Sun Mar 31 04:27:29 2013
@@ -1820,7 +1820,7 @@ public class FSImage extends Storage {
}
static private final UTF8 U_STR = new UTF8();
- static String readString(DataInputStream in) throws IOException {
+ public static String readString(DataInputStream in) throws IOException {
U_STR.readFields(in);
return U_STR.toString();
}
@@ -1830,7 +1830,7 @@ public class FSImage extends Storage {
return s.isEmpty()? null: s;
}
- static byte[] readBytes(DataInputStream in) throws IOException {
+ public static byte[] readBytes(DataInputStream in) throws IOException {
U_STR.readFields(in);
int len = U_STR.getLength();
byte[] bytes = new byte[len];
Modified: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java?rev=1462875&r1=1462874&r2=1462875&view=diff
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java (original)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/server/namenode/NameNode.java Sun Mar 31 04:27:29 2013
@@ -17,6 +17,9 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
+
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
@@ -294,8 +297,15 @@ public class NameNode implements ClientP
myMetrics = NameNodeInstrumentation.create(conf);
this.namesystem = new FSNamesystem(this, conf);
+
+ // For testing purposes, allow the DT secret manager to be started regardless
+ // of whether security is enabled.
+ boolean alwaysUseDelegationTokensForTests =
+ conf.getBoolean(DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
+ DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
- if (UserGroupInformation.isSecurityEnabled()) {
+ if (UserGroupInformation.isSecurityEnabled() ||
+ alwaysUseDelegationTokensForTests) {
namesystem.activateSecretManager();
}
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DelimitedImageVisitor.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+
+/**
+ * A DelimitedImageVisitor generates a text representation of the fsimage,
+ * with each element separated by a delimiter string. All of the elements
+ * common to both inodes and inodes-under-construction are included. When
+ * processing an fsimage with a layout version that did not include an
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ *
+ * Individual block information for each file is not currently included.
+ *
+ * The default delimiter is tab, as this is an unlikely value to be included
+ * an inode path or other text metadata. The delimiter value can be via the
+ * constructor.
+ */
+class DelimitedImageVisitor extends TextWriterImageVisitor {
+ private static final String defaultDelimiter = "\t";
+
+ final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
+ private long fileSize = 0l;
+ // Elements of fsimage we're interested in tracking
+ private final Collection<ImageElement> elementsToTrack;
+ // Values for each of the elements in elementsToTrack
+ private final AbstractMap<ImageElement, String> elements =
+ new HashMap<ImageElement, String>();
+ private final String delimiter;
+
+ {
+ elementsToTrack = new ArrayList<ImageElement>();
+
+ // This collection determines what elements are tracked and the order
+ // in which they are output
+ Collections.addAll(elementsToTrack, ImageElement.INODE_PATH,
+ ImageElement.REPLICATION,
+ ImageElement.MODIFICATION_TIME,
+ ImageElement.ACCESS_TIME,
+ ImageElement.BLOCK_SIZE,
+ ImageElement.NUM_BLOCKS,
+ ImageElement.NUM_BYTES,
+ ImageElement.NS_QUOTA,
+ ImageElement.DS_QUOTA,
+ ImageElement.PERMISSION_STRING,
+ ImageElement.USER_NAME,
+ ImageElement.GROUP_NAME);
+ }
+
+ public DelimitedImageVisitor(String filename) throws IOException {
+ this(filename, false);
+ }
+
+ public DelimitedImageVisitor(String outputFile, boolean printToScreen)
+ throws IOException {
+ this(outputFile, printToScreen, defaultDelimiter);
+ }
+
+ public DelimitedImageVisitor(String outputFile, boolean printToScreen,
+ String delimiter) throws IOException {
+ super(outputFile, printToScreen);
+ this.delimiter = delimiter;
+ reset();
+ }
+
+ /**
+ * Reset the values of the elements we're tracking in order to handle
+ * the next file
+ */
+ private void reset() {
+ elements.clear();
+ for(ImageElement e : elementsToTrack)
+ elements.put(e, null);
+
+ fileSize = 0l;
+ }
+
+ @Override
+ void leaveEnclosingElement() throws IOException {
+ ImageElement elem = elemQ.pop();
+
+ // If we're done with an inode, write out our results and start over
+ if(elem == ImageElement.INODE ||
+ elem == ImageElement.INODE_UNDER_CONSTRUCTION) {
+ writeLine();
+ write("\n");
+ reset();
+ }
+ }
+
+ /**
+ * Iterate through all the elements we're tracking and, if a value was
+ * recorded for it, write it out.
+ */
+ private void writeLine() throws IOException {
+ Iterator<ImageElement> it = elementsToTrack.iterator();
+
+ while(it.hasNext()) {
+ ImageElement e = it.next();
+
+ String v = null;
+ if(e == ImageElement.NUM_BYTES)
+ v = String.valueOf(fileSize);
+ else
+ v = elements.get(e);
+
+ if(v != null)
+ write(v);
+
+ if(it.hasNext())
+ write(delimiter);
+ }
+ }
+
+ @Override
+ void visit(ImageElement element, String value) throws IOException {
+ // Explicitly label the root path
+ if(element == ImageElement.INODE_PATH && value.equals(""))
+ value = "/";
+
+ // Special case of file size, which is sum of the num bytes in each block
+ if(element == ImageElement.NUM_BYTES)
+ fileSize += Long.valueOf(value);
+
+ if(elements.containsKey(element) && element != ImageElement.NUM_BYTES)
+ elements.put(element, value);
+
+ }
+
+ @Override
+ void visitEnclosingElement(ImageElement element) throws IOException {
+ elemQ.push(element);
+ }
+
+ @Override
+ void visitEnclosingElement(ImageElement element, ImageElement key,
+ String value) throws IOException {
+ // Special case as numBlocks is an attribute of the blocks element
+ if(key == ImageElement.NUM_BLOCKS
+ && elements.containsKey(ImageElement.NUM_BLOCKS))
+ elements.put(key, value);
+
+ elemQ.push(element);
+ }
+
+ @Override
+ void start() throws IOException { /* Nothing to do */ }
+}
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/DepthCounter.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Utility class for tracking descent into the structure of the
+ * Visitor class (ImageVisitor, EditsVisitor etc.)
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public class DepthCounter {
+ private int depth = 0;
+
+ public void incLevel() { depth++; }
+ public void decLevel() { if(depth >= 1) depth--; }
+ public int getLevel() { return depth; }
+}
+
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,182 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * File size distribution visitor.
+ *
+ * <h3>Description.</h3>
+ * This is the tool for analyzing file sizes in the namespace image.
+ * In order to run the tool one should define a range of integers
+ * <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
+ * The range of integers is divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
+ * and the visitor calculates how many files in the system fall into
+ * each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>.
+ * Note that files larger than <tt>maxSize</tt> always fall into
+ * the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3>
+ * The output file is formatted as a tab separated two column table:
+ * Size and NumFiles. Where Size represents the start of the segment,
+ * and numFiles is the number of files form the image which size falls in
+ * this segment.
+ */
+class FileDistributionVisitor extends TextWriterImageVisitor {
+ final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
+
+ private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
+ private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
+
+ private int[] distribution;
+ private long maxSize;
+ private int step;
+
+ private int totalFiles;
+ private int totalDirectories;
+ private int totalBlocks;
+ private long totalSpace;
+ private long maxFileSize;
+
+ private FileContext current;
+
+ private boolean inInode = false;
+
+ /**
+ * File or directory information.
+ */
+ private static class FileContext {
+ String path;
+ long fileSize;
+ int numBlocks;
+ int replication;
+ }
+
+ public FileDistributionVisitor(String filename,
+ long maxSize,
+ int step) throws IOException {
+ super(filename, false);
+ this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
+ this.step = (step == 0 ? INTERVAL_DEFAULT : step);
+ long numIntervals = this.maxSize / this.step;
+ if(numIntervals >= Integer.MAX_VALUE)
+ throw new IOException("Too many distribution intervals " + numIntervals);
+ this.distribution = new int[1 + (int)(numIntervals)];
+ this.totalFiles = 0;
+ this.totalDirectories = 0;
+ this.totalBlocks = 0;
+ this.totalSpace = 0;
+ this.maxFileSize = 0;
+ }
+
+ @Override
+ void start() throws IOException {}
+
+ @Override
+ void finish() throws IOException {
+ // write the distribution into the output file
+ write("Size\tNumFiles\n");
+ for(int i = 0; i < distribution.length; i++)
+ write(((long)i * step) + "\t" + distribution[i] + "\n");
+ System.out.println("totalFiles = " + totalFiles);
+ System.out.println("totalDirectories = " + totalDirectories);
+ System.out.println("totalBlocks = " + totalBlocks);
+ System.out.println("totalSpace = " + totalSpace);
+ System.out.println("maxFileSize = " + maxFileSize);
+ super.finish();
+ }
+
+ @Override
+ void leaveEnclosingElement() throws IOException {
+ ImageElement elem = elemS.pop();
+
+ if(elem != ImageElement.INODE &&
+ elem != ImageElement.INODE_UNDER_CONSTRUCTION)
+ return;
+ inInode = false;
+ if(current.numBlocks < 0) {
+ totalDirectories ++;
+ return;
+ }
+ totalFiles++;
+ totalBlocks += current.numBlocks;
+ totalSpace += current.fileSize * current.replication;
+ if(maxFileSize < current.fileSize)
+ maxFileSize = current.fileSize;
+ int high;
+ if(current.fileSize > maxSize)
+ high = distribution.length-1;
+ else
+ high = (int)Math.ceil((double)current.fileSize / step);
+ distribution[high]++;
+ if(totalFiles % 1000000 == 1)
+ System.out.println("Files processed: " + totalFiles
+ + " Current: " + current.path);
+ }
+
+ @Override
+ void visit(ImageElement element, String value) throws IOException {
+ if(inInode) {
+ switch(element) {
+ case INODE_PATH:
+ current.path = (value.equals("") ? "/" : value);
+ break;
+ case REPLICATION:
+ current.replication = Integer.valueOf(value);
+ break;
+ case NUM_BYTES:
+ current.fileSize += Long.valueOf(value);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ @Override
+ void visitEnclosingElement(ImageElement element) throws IOException {
+ elemS.push(element);
+ if(element == ImageElement.INODE ||
+ element == ImageElement.INODE_UNDER_CONSTRUCTION) {
+ current = new FileContext();
+ inInode = true;
+ }
+ }
+
+ @Override
+ void visitEnclosingElement(ImageElement element,
+ ImageElement key, String value) throws IOException {
+ elemS.push(element);
+ if(element == ImageElement.INODE ||
+ element == ImageElement.INODE_UNDER_CONSTRUCTION)
+ inInode = true;
+ else if(element == ImageElement.BLOCKS)
+ current.numBlocks = Integer.parseInt(value);
+ }
+}
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoader.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An ImageLoader can accept a DataInputStream to an Hadoop FSImage file
+ * and walk over its structure using the supplied ImageVisitor.
+ *
+ * Each implementation of ImageLoader is designed to rapidly process an
+ * image file. As long as minor changes are made from one layout version
+ * to another, it is acceptable to tweak one implementation to read the next.
+ * However, if the layout version changes enough that it would make a
+ * processor slow or difficult to read, another processor should be created.
+ * This allows each processor to quickly read an image without getting
+ * bogged down in dealing with significant differences between layout versions.
+ */
+interface ImageLoader {
+
+ /**
+ * @param in DataInputStream pointing to an Hadoop FSImage file
+ * @param v Visit to apply to the FSImage file
+ * @param enumerateBlocks Should visitor visit each of the file blocks?
+ */
+ public void loadImage(DataInputStream in, ImageVisitor v,
+ boolean enumerateBlocks) throws IOException;
+
+ /**
+ * Can this processor handle the specified version of FSImage file?
+ *
+ * @param version FSImage version file
+ * @return True if this instance can process the file
+ */
+ public boolean canLoadVersion(int version);
+
+ /**
+ * Factory for obtaining version of image loader that can read
+ * a particular image format.
+ */
+ @InterfaceAudience.Private
+ public class LoaderFactory {
+ // Java doesn't support static methods on interfaces, which necessitates
+ // this factory class
+
+ /**
+ * Find an image loader capable of interpreting the specified
+ * layout version number. If none, return null;
+ *
+ * @param version fsimage layout version number to be processed
+ * @return ImageLoader that can interpret specified version, or null
+ */
+ static public ImageLoader getLoader(int version) {
+ // Easy to add more image processors as they are written
+ ImageLoader[] loaders = { new ImageLoaderCurrent() };
+
+ for (ImageLoader l : loaders) {
+ if (l.canLoadVersion(version))
+ return l;
+ }
+
+ return null;
+ }
+ }
+}
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,465 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor.ImageElement;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+/**
+ * ImageLoaderCurrent processes Hadoop FSImage files and walks over
+ * them using a provided ImageVisitor, calling the visitor at each element
+ * enumerated below.
+ *
+ * The only difference between v18 and v19 was the utilization of the
+ * stickybit. Therefore, the same viewer can reader either format.
+ *
+ * Versions -19 fsimage layout (with changes from -16 up):
+ * Image version (int)
+ * Namepsace ID (int)
+ * NumFiles (long)
+ * Generation stamp (long)
+ * INodes (count = NumFiles)
+ * INode
+ * Path (String)
+ * Replication (short)
+ * Modification Time (long as date)
+ * Access Time (long) // added in -16
+ * Block size (long)
+ * Num blocks (int)
+ * Blocks (count = Num blocks)
+ * Block
+ * Block ID (long)
+ * Num bytes (long)
+ * Generation stamp (long)
+ * Namespace Quota (long)
+ * Diskspace Quota (long) // added in -18
+ * Permissions
+ * Username (String)
+ * Groupname (String)
+ * OctalPerms (short -> String) // Modified in -19
+ * Symlink (String) // added in -23
+ * NumINodesUnderConstruction (int)
+ * INodesUnderConstruction (count = NumINodesUnderConstruction)
+ * INodeUnderConstruction
+ * Path (bytes as string)
+ * Replication (short)
+ * Modification time (long as date)
+ * Preferred block size (long)
+ * Num blocks (int)
+ * Blocks
+ * Block
+ * Block ID (long)
+ * Num bytes (long)
+ * Generation stamp (long)
+ * Permissions
+ * Username (String)
+ * Groupname (String)
+ * OctalPerms (short -> String)
+ * Client Name (String)
+ * Client Machine (String)
+ * NumLocations (int)
+ * DatanodeDescriptors (count = numLocations) // not loaded into memory
+ * short // but still in file
+ * long
+ * string
+ * long
+ * int
+ * string
+ * string
+ * enum
+ * CurrentDelegationKeyId (int)
+ * NumDelegationKeys (int)
+ * DelegationKeys (count = NumDelegationKeys)
+ * DelegationKeyLength (vint)
+ * DelegationKey (bytes)
+ * DelegationTokenSequenceNumber (int)
+ * NumDelegationTokens (int)
+ * DelegationTokens (count = NumDelegationTokens)
+ * DelegationTokenIdentifier
+ * owner (String)
+ * renewer (String)
+ * realUser (String)
+ * issueDate (vlong)
+ * maxDate (vlong)
+ * sequenceNumber (vint)
+ * masterKeyId (vint)
+ * expiryTime (long)
+ *
+ */
+class ImageLoaderCurrent implements ImageLoader {
+ protected final DateFormat dateFormat =
+ new SimpleDateFormat("yyyy-MM-dd HH:mm");
+ private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
+ -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -41};
+ private int imageVersion = 0;
+
+ /* (non-Javadoc)
+ * @see ImageLoader#canProcessVersion(int)
+ */
+ @Override
+ public boolean canLoadVersion(int version) {
+ for(int v : versions)
+ if(v == version) return true;
+
+ return false;
+ }
+
+ /* (non-Javadoc)
+ * @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
+ */
+ @Override
+ public void loadImage(DataInputStream in, ImageVisitor v,
+ boolean skipBlocks) throws IOException {
+ boolean done = false;
+ try {
+ v.start();
+ v.visitEnclosingElement(ImageElement.FS_IMAGE);
+
+ imageVersion = in.readInt();
+ if( !canLoadVersion(imageVersion))
+ throw new IOException("Cannot process fslayout version " + imageVersion);
+
+ v.visit(ImageElement.IMAGE_VERSION, imageVersion);
+ v.visit(ImageElement.NAMESPACE_ID, in.readInt());
+
+ long numInodes = in.readLong();
+
+ v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+
+ if (LayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
+ boolean isCompressed = in.readBoolean();
+ v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
+ if (isCompressed) {
+ String codecClassName = Text.readString(in);
+ v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
+ CompressionCodecFactory codecFac = new CompressionCodecFactory(
+ new Configuration());
+ CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
+ if (codec == null) {
+ throw new IOException("Image compression codec not supported: "
+ + codecClassName);
+ }
+ in = new DataInputStream(codec.createInputStream(in));
+ }
+ }
+ processINodes(in, v, numInodes, skipBlocks);
+
+ processINodesUC(in, v, skipBlocks);
+
+ if (LayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
+ processDelegationTokens(in, v);
+ }
+
+ v.leaveEnclosingElement(); // FSImage
+ done = true;
+ } finally {
+ if (done) {
+ v.finish();
+ } else {
+ v.finishAbnormally();
+ }
+ }
+ }
+
+ /**
+ * Process the Delegation Token related section in fsimage.
+ *
+ * @param in DataInputStream to process
+ * @param v Visitor to walk over records
+ */
+ private void processDelegationTokens(DataInputStream in, ImageVisitor v)
+ throws IOException {
+ v.visit(ImageElement.CURRENT_DELEGATION_KEY_ID, in.readInt());
+ int numDKeys = in.readInt();
+ v.visitEnclosingElement(ImageElement.DELEGATION_KEYS,
+ ImageElement.NUM_DELEGATION_KEYS, numDKeys);
+ for(int i =0; i < numDKeys; i++) {
+ DelegationKey key = new DelegationKey();
+ key.readFields(in);
+ v.visit(ImageElement.DELEGATION_KEY, key.toString());
+ }
+ v.leaveEnclosingElement();
+ v.visit(ImageElement.DELEGATION_TOKEN_SEQUENCE_NUMBER, in.readInt());
+ int numDTokens = in.readInt();
+ v.visitEnclosingElement(ImageElement.DELEGATION_TOKENS,
+ ImageElement.NUM_DELEGATION_TOKENS, numDTokens);
+ for(int i=0; i<numDTokens; i++){
+ DelegationTokenIdentifier id = new DelegationTokenIdentifier();
+ id.readFields(in);
+ long expiryTime = in.readLong();
+ v.visitEnclosingElement(ImageElement.DELEGATION_TOKEN_IDENTIFIER);
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_KIND,
+ id.getKind().toString());
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+ id.getSequenceNumber());
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+ id.getRenewer().toString());
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+ id.getIssueDate());
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+ id.getMaxDate());
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+ expiryTime);
+ v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
+ id.getMasterKeyId());
+ v.leaveEnclosingElement(); // DELEGATION_TOKEN_IDENTIFIER
+ }
+ v.leaveEnclosingElement(); // DELEGATION_TOKENS
+ }
+
+ /**
+ * Process the INodes under construction section of the fsimage.
+ *
+ * @param in DataInputStream to process
+ * @param v Visitor to walk over inodes
+ * @param skipBlocks Walk over each block?
+ */
+ private void processINodesUC(DataInputStream in, ImageVisitor v,
+ boolean skipBlocks) throws IOException {
+ int numINUC = in.readInt();
+
+ v.visitEnclosingElement(ImageElement.INODES_UNDER_CONSTRUCTION,
+ ImageElement.NUM_INODES_UNDER_CONSTRUCTION, numINUC);
+
+ for(int i = 0; i < numINUC; i++) {
+ v.visitEnclosingElement(ImageElement.INODE_UNDER_CONSTRUCTION);
+ byte [] name = FSImage.readBytes(in);
+ String n = new String(name, "UTF8");
+ v.visit(ImageElement.INODE_PATH, n);
+ v.visit(ImageElement.REPLICATION, in.readShort());
+ v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+
+ v.visit(ImageElement.PREFERRED_BLOCK_SIZE, in.readLong());
+ int numBlocks = in.readInt();
+ processBlocks(in, v, numBlocks, skipBlocks);
+
+ processPermission(in, v);
+ v.visit(ImageElement.CLIENT_NAME, FSImage.readString(in));
+ v.visit(ImageElement.CLIENT_MACHINE, FSImage.readString(in));
+
+ // Skip over the datanode descriptors, which are still stored in the
+ // file but are not used by the datanode or loaded into memory
+ int numLocs = in.readInt();
+ for(int j = 0; j < numLocs; j++) {
+ in.readShort();
+ in.readLong();
+ in.readLong();
+ in.readLong();
+ in.readInt();
+ FSImage.readString(in);
+ FSImage.readString(in);
+ WritableUtils.readEnum(in, AdminStates.class);
+ }
+
+ v.leaveEnclosingElement(); // INodeUnderConstruction
+ }
+
+ v.leaveEnclosingElement(); // INodesUnderConstruction
+ }
+
+ /**
+ * Process the blocks section of the fsimage.
+ *
+ * @param in Datastream to process
+ * @param v Visitor to walk over inodes
+ * @param skipBlocks Walk over each block?
+ */
+ private void processBlocks(DataInputStream in, ImageVisitor v,
+ int numBlocks, boolean skipBlocks) throws IOException {
+ v.visitEnclosingElement(ImageElement.BLOCKS,
+ ImageElement.NUM_BLOCKS, numBlocks);
+
+ // directory or symlink, no blocks to process
+ if(numBlocks == -1 || numBlocks == -2) {
+ v.leaveEnclosingElement(); // Blocks
+ return;
+ }
+
+ if(skipBlocks) {
+ int bytesToSkip = ((Long.SIZE * 3 /* fields */) / 8 /*bits*/) * numBlocks;
+ if(in.skipBytes(bytesToSkip) != bytesToSkip)
+ throw new IOException("Error skipping over blocks");
+
+ } else {
+ for(int j = 0; j < numBlocks; j++) {
+ v.visitEnclosingElement(ImageElement.BLOCK);
+ v.visit(ImageElement.BLOCK_ID, in.readLong());
+ v.visit(ImageElement.NUM_BYTES, in.readLong());
+ v.visit(ImageElement.GENERATION_STAMP, in.readLong());
+ v.leaveEnclosingElement(); // Block
+ }
+ }
+ v.leaveEnclosingElement(); // Blocks
+ }
+
+ /**
+ * Extract the INode permissions stored in the fsimage file.
+ *
+ * @param in Datastream to process
+ * @param v Visitor to walk over inodes
+ */
+ private void processPermission(DataInputStream in, ImageVisitor v)
+ throws IOException {
+ v.visitEnclosingElement(ImageElement.PERMISSIONS);
+ v.visit(ImageElement.USER_NAME, Text.readString(in));
+ v.visit(ImageElement.GROUP_NAME, Text.readString(in));
+ FsPermission fsp = new FsPermission(in.readShort());
+ v.visit(ImageElement.PERMISSION_STRING, fsp.toString());
+ v.leaveEnclosingElement(); // Permissions
+ }
+
+ /**
+ * Process the INode records stored in the fsimage.
+ *
+ * @param in Datastream to process
+ * @param v Visitor to walk over INodes
+ * @param numInodes Number of INodes stored in file
+ * @param skipBlocks Process all the blocks within the INode?
+ * @throws VisitException
+ * @throws IOException
+ */
+ private void processINodes(DataInputStream in, ImageVisitor v,
+ long numInodes, boolean skipBlocks) throws IOException {
+ v.visitEnclosingElement(ImageElement.INODES,
+ ImageElement.NUM_INODES, numInodes);
+
+ if (LayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, imageVersion)) {
+ processLocalNameINodes(in, v, numInodes, skipBlocks);
+ } else { // full path name
+ processFullNameINodes(in, v, numInodes, skipBlocks);
+ }
+
+
+ v.leaveEnclosingElement(); // INodes
+ }
+
+ /**
+ * Process image with full path name
+ *
+ * @param in image stream
+ * @param v visitor
+ * @param numInodes number of indoes to read
+ * @param skipBlocks skip blocks or not
+ * @throws IOException if there is any error occurs
+ */
+ private void processLocalNameINodes(DataInputStream in, ImageVisitor v,
+ long numInodes, boolean skipBlocks) throws IOException {
+ // process root
+ processINode(in, v, skipBlocks, "");
+ numInodes--;
+ while (numInodes > 0) {
+ numInodes -= processDirectory(in, v, skipBlocks);
+ }
+ }
+
+ private int processDirectory(DataInputStream in, ImageVisitor v,
+ boolean skipBlocks) throws IOException {
+ String parentName = FSImage.readString(in);
+ int numChildren = in.readInt();
+ for (int i=0; i<numChildren; i++) {
+ processINode(in, v, skipBlocks, parentName);
+ }
+ return numChildren;
+ }
+
+ /**
+ * Process image with full path name
+ *
+ * @param in image stream
+ * @param v visitor
+ * @param numInodes number of indoes to read
+ * @param skipBlocks skip blocks or not
+ * @throws IOException if there is any error occurs
+ */
+ private void processFullNameINodes(DataInputStream in, ImageVisitor v,
+ long numInodes, boolean skipBlocks) throws IOException {
+ for(long i = 0; i < numInodes; i++) {
+ processINode(in, v, skipBlocks, null);
+ }
+ }
+
+ /**
+ * Process an INode
+ *
+ * @param in image stream
+ * @param v visitor
+ * @param skipBlocks skip blocks or not
+ * @param parentName the name of its parent node
+ * @throws IOException
+ */
+ private void processINode(DataInputStream in, ImageVisitor v,
+ boolean skipBlocks, String parentName) throws IOException {
+ v.visitEnclosingElement(ImageElement.INODE);
+ String pathName = FSImage.readString(in);
+ if (parentName != null) { // local name
+ pathName = "/" + pathName;
+ if (!"/".equals(parentName)) { // children of non-root directory
+ pathName = parentName + pathName;
+ }
+ }
+
+ v.visit(ImageElement.INODE_PATH, pathName);
+ v.visit(ImageElement.REPLICATION, in.readShort());
+ v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
+ if(LayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion))
+ v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
+ v.visit(ImageElement.BLOCK_SIZE, in.readLong());
+ int numBlocks = in.readInt();
+
+ processBlocks(in, v, numBlocks, skipBlocks);
+
+ // File or directory
+ if (numBlocks > 0 || numBlocks == -1) {
+ v.visit(ImageElement.NS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+ if (LayoutVersion.supports(Feature.DISKSPACE_QUOTA, imageVersion))
+ v.visit(ImageElement.DS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
+ }
+ if (numBlocks == -2) {
+ v.visit(ImageElement.SYMLINK, Text.readString(in));
+ }
+
+ processPermission(in, v);
+ v.leaveEnclosingElement(); // INode
+ }
+
+ /**
+ * Helper method to format dates during processing.
+ * @param date Date as read from image file
+ * @return String version of date format
+ */
+ private String formatDate(long date) {
+ return dateFormat.format(new Date(date));
+ }
+}
Added: hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java?rev=1462875&view=auto
==============================================================================
--- hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java (added)
+++ hadoop/common/branches/branch-1/src/hdfs/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java Sun Mar 31 04:27:29 2013
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.IOException;
+
+/**
+ * An implementation of ImageVisitor can traverse the structure of an
+ * Hadoop fsimage and respond to each of the structures within the file.
+ */
+abstract class ImageVisitor {
+
+ /**
+ * Structural elements of an FSImage that may be encountered within the
+ * file. ImageVisitors are able to handle processing any of these elements.
+ */
+ public enum ImageElement {
+ FS_IMAGE,
+ IMAGE_VERSION,
+ NAMESPACE_ID,
+ IS_COMPRESSED,
+ COMPRESS_CODEC,
+ LAYOUT_VERSION,
+ NUM_INODES,
+ GENERATION_STAMP,
+ INODES,
+ INODE,
+ INODE_PATH,
+ REPLICATION,
+ MODIFICATION_TIME,
+ ACCESS_TIME,
+ BLOCK_SIZE,
+ NUM_BLOCKS,
+ BLOCKS,
+ BLOCK,
+ BLOCK_ID,
+ NUM_BYTES,
+ NS_QUOTA,
+ DS_QUOTA,
+ PERMISSIONS,
+ SYMLINK,
+ NUM_INODES_UNDER_CONSTRUCTION,
+ INODES_UNDER_CONSTRUCTION,
+ INODE_UNDER_CONSTRUCTION,
+ PREFERRED_BLOCK_SIZE,
+ CLIENT_NAME,
+ CLIENT_MACHINE,
+ USER_NAME,
+ GROUP_NAME,
+ PERMISSION_STRING,
+ CURRENT_DELEGATION_KEY_ID,
+ NUM_DELEGATION_KEYS,
+ DELEGATION_KEYS,
+ DELEGATION_KEY,
+ DELEGATION_TOKEN_SEQUENCE_NUMBER,
+ NUM_DELEGATION_TOKENS,
+ DELEGATION_TOKENS,
+ DELEGATION_TOKEN_IDENTIFIER,
+ DELEGATION_TOKEN_IDENTIFIER_KIND,
+ DELEGATION_TOKEN_IDENTIFIER_SEQNO,
+ DELEGATION_TOKEN_IDENTIFIER_OWNER,
+ DELEGATION_TOKEN_IDENTIFIER_RENEWER,
+ DELEGATION_TOKEN_IDENTIFIER_REALUSER,
+ DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
+ DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
+ DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
+ DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID
+ }
+
+ /**
+ * Begin visiting the fsimage structure. Opportunity to perform
+ * any initialization necessary for the implementing visitor.
+ */
+ abstract void start() throws IOException;
+
+ /**
+ * Finish visiting the fsimage structure. Opportunity to perform any
+ * clean up necessary for the implementing visitor.
+ */
+ abstract void finish() throws IOException;
+
+ /**
+ * Finish visiting the fsimage structure after an error has occurred
+ * during the processing. Opportunity to perform any clean up necessary
+ * for the implementing visitor.
+ */
+ abstract void finishAbnormally() throws IOException;
+
+ /**
+ * Visit non enclosing element of fsimage with specified value.
+ *
+ * @param element FSImage element
+ * @param value Element's value
+ */
+ abstract void visit(ImageElement element, String value) throws IOException;
+
+ // Convenience methods to automatically convert numeric value types to strings
+ void visit(ImageElement element, int value) throws IOException {
+ visit(element, Integer.toString(value));
+ }
+
+ void visit(ImageElement element, long value) throws IOException {
+ visit(element, Long.toString(value));
+ }
+
+ /**
+ * Begin visiting an element that encloses another element, such as
+ * the beginning of the list of blocks that comprise a file.
+ *
+ * @param element Element being visited
+ */
+ abstract void visitEnclosingElement(ImageElement element)
+ throws IOException;
+
+ /**
+ * Begin visiting an element that encloses another element, such as
+ * the beginning of the list of blocks that comprise a file.
+ *
+ * Also provide an additional key and value for the element, such as the
+ * number items within the element.
+ *
+ * @param element Element being visited
+ * @param key Key describing the element being visited
+ * @param value Value associated with element being visited
+ */
+ abstract void visitEnclosingElement(ImageElement element,
+ ImageElement key, String value) throws IOException;
+
+ // Convenience methods to automatically convert value types to strings
+ void visitEnclosingElement(ImageElement element,
+ ImageElement key, int value)
+ throws IOException {
+ visitEnclosingElement(element, key, Integer.toString(value));
+ }
+
+ void visitEnclosingElement(ImageElement element,
+ ImageElement key, long value)
+ throws IOException {
+ visitEnclosingElement(element, key, Long.toString(value));
+ }
+
+ /**
+ * Leave current enclosing element. Called, for instance, at the end of
+ * processing the blocks that compromise a file.
+ */
+ abstract void leaveEnclosingElement() throws IOException;
+}