You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cm...@apache.org on 2016/03/03 02:56:23 UTC

[1/2] hadoop git commit: HDFS-9835. OIV: add ReverseXML processor which reconstructs an fsimage from an XML file (cmccabe)

Repository: hadoop
Updated Branches:
  refs/heads/trunk 67880ccae -> 700b0e401


http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
index d23ff61..71dc939 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
@@ -23,17 +23,24 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
 import java.io.RandomAccessFile;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.Date;
+import java.util.TimeZone;
 
+import com.google.protobuf.ByteString;
+import org.apache.commons.codec.binary.Hex;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.AclEntry;
+import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.protocol.proto.XAttrProtos;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
 import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
@@ -55,6 +62,14 @@ import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.util.LimitInputStream;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
+import org.apache.hadoop.util.VersionInfo;
+
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_MASK;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_OFFSET;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_EXT_MASK;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_EXT_OFFSET;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAME_OFFSET;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAME_MASK;
 
 /**
  * PBImageXmlWriter walks over an fsimage structure and writes out
@@ -64,11 +79,20 @@ import com.google.common.collect.Lists;
 public final class PBImageXmlWriter {
   private final Configuration conf;
   private final PrintStream out;
+  private final SimpleDateFormat isoDateFormat;
   private String[] stringTable;
 
+  public static SimpleDateFormat createSimpleDateFormat() {
+    SimpleDateFormat format =
+      new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS");
+    format.setTimeZone(TimeZone.getTimeZone("UTC"));
+    return format;
+  }
+
   public PBImageXmlWriter(Configuration conf, PrintStream out) {
     this.conf = conf;
     this.out = out;
+    this.isoDateFormat = createSimpleDateFormat();
   }
 
   public void visit(RandomAccessFile file) throws IOException {
@@ -80,6 +104,16 @@ public final class PBImageXmlWriter {
     try (FileInputStream fin = new FileInputStream(file.getFD())) {
       out.print("<?xml version=\"1.0\"?>\n<fsimage>");
 
+      out.print("<version>");
+      o("layoutVersion", summary.getLayoutVersion());
+      o("onDiskVersion", summary.getOndiskVersion());
+      // Output the version of OIV (which is not necessarily the version of
+      // the fsimage file).  This could be helpful in the case where a bug
+      // in OIV leads to information loss in the XML-- we can quickly tell
+      // if a specific fsimage XML file is affected by this bug.
+      o("oivRevision", VersionInfo.getRevision());
+      out.print("</version>\n");
+
       ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
           .getSectionsList());
       Collections.sort(sections, new Comparator<FileSummary.Section>() {
@@ -146,6 +180,8 @@ public final class PBImageXmlWriter {
     out.print("<CacheManagerSection>");
     CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(is);
     o("nextDirectiveId", s.getNextDirectiveId());
+    o("numDirectives", s.getNumDirectives());
+    o("numPools", s.getNumPools());
     for (int i = 0; i < s.getNumPools(); ++i) {
       CachePoolInfoProto p = CachePoolInfoProto.parseDelimitedFrom(is);
       out.print("<pool>");
@@ -163,7 +199,7 @@ public final class PBImageXmlWriter {
           .o("replication", p.getReplication()).o("pool", p.getPool());
       out.print("<expiration>");
       CacheDirectiveInfoExpirationProto e = p.getExpiration();
-      o("millis", e.getMillis()).o("relatilve", e.getIsRelative());
+      o("millis", e.getMillis()).o("relative", e.getIsRelative());
       out.print("</expiration>\n");
       out.print("</directive>\n");
     }
@@ -187,13 +223,48 @@ public final class PBImageXmlWriter {
     out.print("</FileUnderConstructionSection>\n");
   }
 
+  private void dumpXattrs(INodeSection.XAttrFeatureProto xattrs) {
+    out.print("<xattrs>");
+    for (INodeSection.XAttrCompactProto xattr : xattrs.getXAttrsList()) {
+      out.print("<xattr>");
+      int encodedName = xattr.getName();
+      int ns = (XATTR_NAMESPACE_MASK & (encodedName >> XATTR_NAMESPACE_OFFSET)) |
+          ((XATTR_NAMESPACE_EXT_MASK & (encodedName >> XATTR_NAMESPACE_EXT_OFFSET)) << 2);
+      o("ns", XAttrProtos.XAttrProto.
+          XAttrNamespaceProto.valueOf(ns).toString());
+      o("name", stringTable[XATTR_NAME_MASK & (encodedName >> XATTR_NAME_OFFSET)]);
+      ByteString val = xattr.getValue();
+      if (val.isValidUtf8()) {
+        o("val", val.toStringUtf8());
+      } else {
+        o("valHex", Hex.encodeHexString(val.toByteArray()));
+      }
+      out.print("</xattr>");
+    }
+    out.print("</xattrs>");
+  }
+
   private void dumpINodeDirectory(INodeDirectory d) {
     o("mtime", d.getModificationTime()).o("permission",
         dumpPermission(d.getPermission()));
+    if (d.hasXAttrs()) {
+      dumpXattrs(d.getXAttrs());
+    }
     dumpAcls(d.getAcl());
     if (d.hasDsQuota() && d.hasNsQuota()) {
       o("nsquota", d.getNsQuota()).o("dsquota", d.getDsQuota());
     }
+    INodeSection.QuotaByStorageTypeFeatureProto typeQuotas =
+      d.getTypeQuotas();
+    if (typeQuotas != null) {
+      for (INodeSection.QuotaByStorageTypeEntryProto entry:
+            typeQuotas.getQuotasList()) {
+        out.print("<typeQuota>");
+        o("type", entry.getStorageType().toString());
+        o("quota", entry.getQuota());
+        out.print("</typeQuota>");
+      }
+    }
   }
 
   private void dumpINodeDirectorySection(InputStream in) throws IOException {
@@ -208,10 +279,10 @@ public final class PBImageXmlWriter {
       out.print("<directory>");
       o("parent", e.getParent());
       for (long id : e.getChildrenList()) {
-        o("inode", id);
+        o("child", id);
       }
       for (int refId : e.getRefChildrenList()) {
-        o("inodereference-index", refId);
+        o("refChild", refId);
       }
       out.print("</directory>\n");
     }
@@ -244,6 +315,9 @@ public final class PBImageXmlWriter {
         .o("atime", f.getAccessTime())
         .o("preferredBlockSize", f.getPreferredBlockSize())
         .o("permission", dumpPermission(f.getPermission()));
+    if (f.hasXAttrs()) {
+      dumpXattrs(f.getXAttrs());
+    }
     dumpAcls(f.getAcl());
     if (f.getBlocksCount() > 0) {
       out.print("<blocks>");
@@ -255,6 +329,12 @@ public final class PBImageXmlWriter {
       }
       out.print("</blocks>\n");
     }
+    if (f.hasStoragePolicyID()) {
+      o("storagePolicyId", f.getStoragePolicyID());
+    }
+    if (f.getIsStriped()) {
+      out.print("<isStriped/>");
+    }
 
     if (f.hasFileUC()) {
       INodeSection.FileUnderConstructionFeature u = f.getFileUC();
@@ -281,25 +361,28 @@ public final class PBImageXmlWriter {
     INodeSection s = INodeSection.parseDelimitedFrom(in);
     out.print("<INodeSection>");
     o("lastInodeId", s.getLastInodeId());
+    o("numInodes", s.getNumInodes());
     for (int i = 0; i < s.getNumInodes(); ++i) {
       INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
       out.print("<inode>");
-      o("id", p.getId()).o("type", p.getType()).o("name",
-          p.getName().toStringUtf8());
-
-      if (p.hasFile()) {
-        dumpINodeFile(p.getFile());
-      } else if (p.hasDirectory()) {
-        dumpINodeDirectory(p.getDirectory());
-      } else if (p.hasSymlink()) {
-        dumpINodeSymlink(p.getSymlink());
-      }
-
+      dumpINodeFields(p);
       out.print("</inode>\n");
     }
     out.print("</INodeSection>\n");
   }
 
+  private void dumpINodeFields(INodeSection.INode p) {
+    o("id", p.getId()).o("type", p.getType()).o("name",
+        p.getName().toStringUtf8());
+    if (p.hasFile()) {
+      dumpINodeFile(p.getFile());
+    } else if (p.hasDirectory()) {
+      dumpINodeDirectory(p.getDirectory());
+    } else if (p.hasSymlink()) {
+      dumpINodeSymlink(p.getSymlink());
+    }
+  }
+
   private void dumpINodeSymlink(INodeSymlink s) {
     o("permission", dumpPermission(s.getPermission()))
         .o("target", s.getTarget().toStringUtf8())
@@ -308,7 +391,8 @@ public final class PBImageXmlWriter {
 
   private void dumpNameSection(InputStream in) throws IOException {
     NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
-    out.print("<NameSection>\n");
+    out.print("<NameSection>");
+    o("namespaceId", s.getNamespaceId());
     o("genstampV1", s.getGenstampV1()).o("genstampV2", s.getGenstampV2())
         .o("genstampV1Limit", s.getGenstampV1Limit())
         .o("lastAllocatedBlockId", s.getLastAllocatedBlockId())
@@ -317,18 +401,73 @@ public final class PBImageXmlWriter {
   }
 
   private String dumpPermission(long permission) {
-    return FSImageFormatPBINode.Loader.loadPermission(permission, stringTable)
-        .toString();
+    PermissionStatus permStatus = FSImageFormatPBINode.Loader.
+        loadPermission(permission, stringTable);
+    return String.format("%s:%s:%04o", permStatus.getUserName(),
+        permStatus.getGroupName(), permStatus.getPermission().toExtendedShort());
   }
 
   private void dumpSecretManagerSection(InputStream is) throws IOException {
     out.print("<SecretManagerSection>");
     SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(is);
+    int expectedNumDelegationKeys = s.getNumKeys();
+    int expectedNumTokens = s.getNumTokens();
     o("currentId", s.getCurrentId()).o("tokenSequenceNumber",
-        s.getTokenSequenceNumber());
+        s.getTokenSequenceNumber()).
+        o("numDelegationKeys", expectedNumDelegationKeys).
+        o("numTokens", expectedNumTokens);
+    for (int i = 0; i < expectedNumDelegationKeys; i++) {
+      SecretManagerSection.DelegationKey dkey =
+          SecretManagerSection.DelegationKey.parseDelimitedFrom(is);
+      out.print("<delegationKey>");
+      o("id", dkey.getId());
+      o("key", Hex.encodeHexString(dkey.getKey().toByteArray()));
+      if (dkey.hasExpiryDate()) {
+        dumpDate("expiry", dkey.getExpiryDate());
+      }
+      out.print("</delegationKey>");
+    }
+    for (int i = 0; i < expectedNumTokens; i++) {
+      SecretManagerSection.PersistToken token =
+          SecretManagerSection.PersistToken.parseDelimitedFrom(is);
+      out.print("<token>");
+      if (token.hasVersion()) {
+        o("version", token.getVersion());
+      }
+      if (token.hasOwner()) {
+        o("owner", token.getOwner());
+      }
+      if (token.hasRenewer()) {
+        o("renewer", token.getRenewer());
+      }
+      if (token.hasRealUser()) {
+        o("realUser", token.getRealUser());
+      }
+      if (token.hasIssueDate()) {
+        dumpDate("issueDate", token.getIssueDate());
+      }
+      if (token.hasMaxDate()) {
+        dumpDate("maxDate", token.getMaxDate());
+      }
+      if (token.hasSequenceNumber()) {
+        o("sequenceNumber", token.getSequenceNumber());
+      }
+      if (token.hasMasterKeyId()) {
+        o("masterKeyId", token.getMasterKeyId());
+      }
+      if (token.hasExpiryDate()) {
+        dumpDate("expiryDate", token.getExpiryDate());
+      }
+      out.print("</token>");
+    }
     out.print("</SecretManagerSection>");
   }
 
+  private void dumpDate(String tag, long date) {
+    out.print("<" + tag + ">" +
+      isoDateFormat.format(new Date(date)) + "</" + tag + ">");
+  }
+
   private void dumpSnapshotDiffSection(InputStream in) throws IOException {
     out.print("<SnapshotDiffSection>");
     while (true) {
@@ -337,30 +476,46 @@ public final class PBImageXmlWriter {
       if (e == null) {
         break;
       }
-      out.print("<diff>");
-      o("inodeid", e.getInodeId());
+      switch (e.getType()) {
+      case FILEDIFF:
+        out.print("<fileDiffEntry>");
+        break;
+      case DIRECTORYDIFF:
+        out.print("<dirDiffEntry>");
+        break;
+      default:
+        throw new IOException("unknown DiffEntry type " + e.getType());
+      }
+      o("inodeId", e.getInodeId());
+      o("count", e.getNumOfDiff());
       switch (e.getType()) {
       case FILEDIFF: {
         for (int i = 0; i < e.getNumOfDiff(); ++i) {
-          out.print("<filediff>");
+          out.print("<fileDiff>");
           SnapshotDiffSection.FileDiff f = SnapshotDiffSection.FileDiff
               .parseDelimitedFrom(in);
           o("snapshotId", f.getSnapshotId()).o("size", f.getFileSize()).o(
               "name", f.getName().toStringUtf8());
-          out.print("</filediff>\n");
+          out.print("</fileDiff>\n");
         }
       }
         break;
       case DIRECTORYDIFF: {
         for (int i = 0; i < e.getNumOfDiff(); ++i) {
-          out.print("<dirdiff>");
+          out.print("<dirDiff>");
           SnapshotDiffSection.DirectoryDiff d = SnapshotDiffSection.DirectoryDiff
               .parseDelimitedFrom(in);
           o("snapshotId", d.getSnapshotId())
-              .o("isSnapshotroot", d.getIsSnapshotRoot())
               .o("childrenSize", d.getChildrenSize())
-              .o("name", d.getName().toStringUtf8());
-
+              .o("isSnapshotRoot", d.getIsSnapshotRoot())
+              .o("name", d.getName().toStringUtf8())
+              .o("createdListSize", d.getCreatedListSize());
+          for (long did : d.getDeletedINodeList()) {
+            o("deletedInode", did);
+          }
+          for (int dRefid : d.getDeletedINodeRefList()) {
+            o("deletedInoderef", dRefid);
+          }
           for (int j = 0; j < d.getCreatedListSize(); ++j) {
             SnapshotDiffSection.CreatedListEntry ce = SnapshotDiffSection.CreatedListEntry
                 .parseDelimitedFrom(in);
@@ -368,24 +523,23 @@ public final class PBImageXmlWriter {
             o("name", ce.getName().toStringUtf8());
             out.print("</created>\n");
           }
-          for (long did : d.getDeletedINodeList()) {
-            out.print("<deleted>");
-            o("inode", did);
-            out.print("</deleted>\n");
-          }
-          for (int dRefid : d.getDeletedINodeRefList()) {
-            out.print("<deleted>");
-            o("inodereference-index", dRefid);
-            out.print("</deleted>\n");
-          }
-          out.print("</dirdiff>\n");
+          out.print("</dirDiff>\n");
         }
-      }
         break;
+      }
       default:
         break;
       }
-      out.print("</diff>");
+      switch (e.getType()) {
+      case FILEDIFF:
+        out.print("</fileDiffEntry>");
+        break;
+      case DIRECTORYDIFF:
+        out.print("</dirDiffEntry>");
+        break;
+      default:
+        throw new IOException("unknown DiffEntry type " + e.getType());
+      }
     }
     out.print("</SnapshotDiffSection>\n");
   }
@@ -394,6 +548,7 @@ public final class PBImageXmlWriter {
     out.print("<SnapshotSection>");
     SnapshotSection s = SnapshotSection.parseDelimitedFrom(in);
     o("snapshotCounter", s.getSnapshotCounter());
+    o("numSnapshots", s.getNumSnapshots());
     if (s.getSnapshottableDirCount() > 0) {
       out.print("<snapshottableDir>");
       for (long id : s.getSnapshottableDirList()) {
@@ -404,7 +559,12 @@ public final class PBImageXmlWriter {
     for (int i = 0; i < s.getNumSnapshots(); ++i) {
       SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
           .parseDelimitedFrom(in);
-      o("snapshot", pbs.getSnapshotId());
+      out.print("<snapshot>");
+      o("id", pbs.getSnapshotId());
+      out.print("<root>");
+      dumpINodeFields(pbs.getRoot());
+      out.print("</root>");
+      out.print("</snapshot>");
     }
     out.print("</SnapshotSection>\n");
   }
@@ -420,6 +580,14 @@ public final class PBImageXmlWriter {
   }
 
   private PBImageXmlWriter o(final String e, final Object v) {
+    if (v instanceof Boolean) {
+      // For booleans, the presence of the element indicates true, and its
+      // absence indicates false.
+      if ((Boolean)v != false) {
+        out.print("<" + e + "/>");
+      }
+      return this;
+    }
     out.print("<" + e + ">" +
         XMLUtils.mangleXmlString(v.toString(), true) + "</" + e + ">");
     return this;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
index 91b79e2..c7a6ae9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
@@ -47,6 +47,8 @@ import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 
+import com.google.common.io.Files;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.output.NullOutputStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -66,6 +68,8 @@ import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.log4j.Level;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -89,8 +93,7 @@ public class TestOfflineImageViewer {
   final static HashMap<String, FileStatus> writtenFiles = Maps.newHashMap();
   static int dirCount = 0;
 
-  @Rule
-  public TemporaryFolder folder = new TemporaryFolder();
+  private static File tempDir;
 
   // Create a populated namespace for later testing. Save its contents to a
   // data structure and store its fsimage location.
@@ -98,6 +101,7 @@ public class TestOfflineImageViewer {
   // multiple tests.
   @BeforeClass
   public static void createOriginalFSImage() throws IOException {
+    tempDir = Files.createTempDir();
     MiniDFSCluster cluster = null;
     try {
       Configuration conf = new Configuration();
@@ -169,6 +173,9 @@ public class TestOfflineImageViewer {
       hdfs.setXAttr(xattr, "user.a2", new byte[]{ 0x37, 0x38, 0x39 });
       // OIV should be able to handle empty value XAttrs
       hdfs.setXAttr(xattr, "user.a3", null);
+      // OIV should be able to handle XAttr values that can't be expressed
+      // as UTF8
+      hdfs.setXAttr(xattr, "user.a4", new byte[]{ -0x3d, 0x28 });
       writtenFiles.put(xattr.toString(), hdfs.getFileStatus(xattr));
 
       // Write results to the fsimage file
@@ -190,6 +197,7 @@ public class TestOfflineImageViewer {
 
   @AfterClass
   public static void deleteOriginalFSImage() throws IOException {
+    FileUtils.deleteQuietly(tempDir);
     if (originalFsimage != null && originalFsimage.exists()) {
       originalFsimage.delete();
     }
@@ -204,7 +212,7 @@ public class TestOfflineImageViewer {
 
   @Test(expected = IOException.class)
   public void testTruncatedFSImage() throws IOException {
-    File truncatedFile = folder.newFile();
+    File truncatedFile = new File(tempDir, "truncatedFsImage");
     PrintStream output = new PrintStream(NullOutputStream.NULL_OUTPUT_STREAM);
     copyPartOfFile(originalFsimage, truncatedFile);
     new FileDistributionCalculator(new Configuration(), 0, 0, output)
@@ -449,4 +457,46 @@ public class TestOfflineImageViewer {
     connection.connect();
     assertEquals(expectedCode, connection.getResponseCode());
   }
+
+  /**
+   * Tests the ReverseXML processor.
+   *
+   * 1. Translate fsimage -> reverseImage.xml
+   * 2. Translate reverseImage.xml -> reverseImage
+   * 3. Translate reverseImage -> reverse2Image.xml
+   * 4. Verify that reverseImage.xml and reverse2Image.xml match
+   *
+   * @throws Throwable
+   */
+  @Test
+  public void testReverseXmlRoundTrip() throws Throwable {
+    GenericTestUtils.setLogLevel(OfflineImageReconstructor.LOG,
+        Level.TRACE);
+    File reverseImageXml = new File(tempDir, "reverseImage.xml");
+    File reverseImage =  new File(tempDir, "reverseImage");
+    File reverseImage2Xml =  new File(tempDir, "reverseImage2.xml");
+    LOG.info("Creating reverseImage.xml=" + reverseImageXml.getAbsolutePath() +
+        ", reverseImage=" + reverseImage.getAbsolutePath() +
+        ", reverseImage2Xml=" + reverseImage2Xml.getAbsolutePath());
+    if (OfflineImageViewerPB.run(new String[] { "-p", "XML",
+         "-i", originalFsimage.getAbsolutePath(),
+         "-o", reverseImageXml.getAbsolutePath() }) != 0) {
+      throw new IOException("oiv returned failure creating first XML file.");
+    }
+    if (OfflineImageViewerPB.run(new String[] { "-p", "ReverseXML",
+          "-i", reverseImageXml.getAbsolutePath(),
+          "-o", reverseImage.getAbsolutePath() }) != 0) {
+      throw new IOException("oiv returned failure recreating fsimage file.");
+    }
+    if (OfflineImageViewerPB.run(new String[] { "-p", "XML",
+        "-i", reverseImage.getAbsolutePath(),
+        "-o", reverseImage2Xml.getAbsolutePath() }) != 0) {
+      throw new IOException("oiv returned failure creating second " +
+          "XML file.");
+    }
+    // The XML file we wrote based on the re-created fsimage should be the
+    // same as the one we dumped from the original fsimage.
+    Assert.assertEquals("",
+      GenericTestUtils.getFilesDiff(reverseImageXml, reverseImage2Xml));
+  }
 }


[2/2] hadoop git commit: HDFS-9835. OIV: add ReverseXML processor which reconstructs an fsimage from an XML file (cmccabe)

Posted by cm...@apache.org.
HDFS-9835. OIV: add ReverseXML processor which reconstructs an fsimage from an XML file (cmccabe)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/700b0e40
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/700b0e40
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/700b0e40

Branch: refs/heads/trunk
Commit: 700b0e4019cf483f7532609711812150b8c44742
Parents: 67880cc
Author: Colin Patrick Mccabe <cm...@cloudera.com>
Authored: Thu Feb 25 16:43:54 2016 -0800
Committer: Colin Patrick Mccabe <cm...@cloudera.com>
Committed: Wed Mar 2 17:56:11 2016 -0800

----------------------------------------------------------------------
 .../apache/hadoop/test/GenericTestUtils.java    |   56 +
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |    2 +
 .../server/namenode/FSImageFormatPBINode.java   |   12 +-
 .../OfflineImageReconstructor.java              | 1639 ++++++++++++++++++
 .../OfflineImageViewerPB.java                   |   22 +-
 .../offlineImageViewer/PBImageXmlWriter.java    |  248 ++-
 .../TestOfflineImageViewer.java                 |   56 +-
 7 files changed, 1983 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index 7e494a8..1907094 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -17,8 +17,12 @@
  */
 package org.apache.hadoop.test;
 
+import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.StringWriter;
 import java.lang.management.ManagementFactory;
 import java.lang.management.ThreadInfo;
@@ -32,6 +36,7 @@ import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.regex.Pattern;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.fs.FileUtil;
@@ -481,4 +486,55 @@ public abstract class GenericTestUtils {
     Assume.assumeTrue(
         Boolean.parseBoolean(System.getProperty("runningWithNative", "false")));
   }
+
+  /**
+   * Get the diff between two files.
+   *
+   * @param a
+   * @param b
+   * @return The empty string if there is no diff; the diff, otherwise.
+   *
+   * @throws IOException If there is an error reading either file.
+   */
+  public static String getFilesDiff(File a, File b) throws IOException {
+    StringBuilder bld = new StringBuilder();
+    BufferedReader ra = null, rb = null;
+    try {
+      ra = new BufferedReader(
+          new InputStreamReader(new FileInputStream(a)));
+      rb = new BufferedReader(
+          new InputStreamReader(new FileInputStream(b)));
+      while (true) {
+        String la = ra.readLine();
+        String lb = rb.readLine();
+        if (la == null) {
+          if (lb != null) {
+            addPlusses(bld, ra);
+          }
+          break;
+        } else if (lb == null) {
+          if (la != null) {
+            addPlusses(bld, rb);
+          }
+          break;
+        }
+        if (!la.equals(lb)) {
+          bld.append(" - ").append(la).append("\n");
+          bld.append(" + ").append(lb).append("\n");
+        }
+      }
+    } finally {
+      IOUtils.closeQuietly(ra);
+      IOUtils.closeQuietly(rb);
+    }
+    return bld.toString();
+  }
+
+  private static void addPlusses(StringBuilder bld, BufferedReader r)
+      throws IOException {
+    String l;
+    while ((l = r.readLine()) != null) {
+      bld.append(" + ").append(l).append("\n");
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index ee204c8..728342a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -980,6 +980,8 @@ Release 2.9.0 - UNRELEASED
     HDFS-9047. Retire libwebhdfs. (wheat9)
 
   NEW FEATURES
+    HDFS-9835. OIV: add ReverseXML processor which reconstructs an fsimage from
+    an XML file (cmccabe)
 
   IMPROVEMENTS
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
index 2f74a2b..1c37b62 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
@@ -90,14 +90,14 @@ public final class FSImageFormatPBINode {
   private static final AclEntryType[] ACL_ENTRY_TYPE_VALUES = AclEntryType
       .values();
   
-  private static final int XATTR_NAMESPACE_MASK = 3;
-  private static final int XATTR_NAMESPACE_OFFSET = 30;
-  private static final int XATTR_NAME_MASK = (1 << 24) - 1;
-  private static final int XATTR_NAME_OFFSET = 6;
+  public static final int XATTR_NAMESPACE_MASK = 3;
+  public static final int XATTR_NAMESPACE_OFFSET = 30;
+  public static final int XATTR_NAME_MASK = (1 << 24) - 1;
+  public static final int XATTR_NAME_OFFSET = 6;
 
   /* See the comments in fsimage.proto for an explanation of the following. */
-  private static final int XATTR_NAMESPACE_EXT_OFFSET = 5;
-  private static final int XATTR_NAMESPACE_EXT_MASK = 1;
+  public static final int XATTR_NAMESPACE_EXT_OFFSET = 5;
+  public static final int XATTR_NAMESPACE_EXT_MASK = 1;
 
   private static final XAttr.NameSpace[] XATTR_NAMESPACE_VALUES =
       XAttr.NameSpace.values();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java
new file mode 100644
index 0000000..e5d0e2c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageReconstructor.java
@@ -0,0 +1,1639 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_MASK;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAME_OFFSET;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_OFFSET;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_EXT_OFFSET;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.XATTR_NAMESPACE_EXT_MASK;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+
+import com.google.common.io.CountingOutputStream;
+import com.google.common.primitives.Ints;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.TextFormat;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.XAttrProtos;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.DiffEntry;
+import org.apache.hadoop.hdfs.util.MD5FileUtils;
+import org.apache.hadoop.hdfs.util.XMLUtils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.util.StringUtils;
+
+import javax.xml.bind.annotation.adapters.HexBinaryAdapter;
+import javax.xml.stream.XMLEventReader;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.events.XMLEvent;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+class OfflineImageReconstructor {
+  public static final Log LOG =
+      LogFactory.getLog(OfflineImageReconstructor.class);
+
+  /**
+   * The output stream.
+   */
+  private final CountingOutputStream out;
+
+  /**
+   * A source of XML events based on the input file.
+   */
+  private final XMLEventReader events;
+
+  /**
+   * A map of section names to section handler objects.
+   */
+  private final HashMap<String, SectionProcessor> sections;
+
+  /**
+   * The offset of the start of the current section.
+   */
+  private long sectionStartOffset;
+
+  /**
+   * The FileSummary builder, where we gather information about each section
+   * we wrote.
+   */
+  private final FileSummary.Builder fileSummaryBld =
+      FileSummary.newBuilder();
+
+  /**
+   * The string table.  See registerStringId for details.
+   */
+  private final HashMap<String, Integer> stringTable = new HashMap<>();
+
+  /**
+   * The date formatter to use with fsimage XML files.
+   */
+  private final SimpleDateFormat isoDateFormat;
+
+  /**
+   * The latest string ID.  See registerStringId for details.
+   */
+  private int latestStringId = 0;
+
+  private OfflineImageReconstructor(CountingOutputStream out,
+      InputStreamReader reader) throws XMLStreamException {
+    this.out = out;
+    XMLInputFactory factory = XMLInputFactory.newInstance();
+    this.events = factory.createXMLEventReader(reader);
+    this.sections = new HashMap<>();
+    this.sections.put(NameSectionProcessor.NAME, new NameSectionProcessor());
+    this.sections.put(INodeSectionProcessor.NAME, new INodeSectionProcessor());
+    this.sections.put(SecretManagerSectionProcessor.NAME,
+        new SecretManagerSectionProcessor());
+    this.sections.put(CacheManagerSectionProcessor.NAME,
+        new CacheManagerSectionProcessor());
+    this.sections.put(SnapshotDiffSectionProcessor.NAME,
+        new SnapshotDiffSectionProcessor());
+    this.sections.put(INodeReferenceSectionProcessor.NAME,
+        new INodeReferenceSectionProcessor());
+    this.sections.put(INodeDirectorySectionProcessor.NAME,
+        new INodeDirectorySectionProcessor());
+    this.sections.put(FilesUnderConstructionSectionProcessor.NAME,
+        new FilesUnderConstructionSectionProcessor());
+    this.sections.put(SnapshotSectionProcessor.NAME,
+        new SnapshotSectionProcessor());
+    this.isoDateFormat = PBImageXmlWriter.createSimpleDateFormat();
+  }
+
+  /**
+   * Read the next tag start or end event.
+   *
+   * @param expected     The name of the next tag we expect.
+   *                     We will validate that the tag has this name,
+   *                     unless this string is enclosed in braces.
+   * @param allowEnd     If true, we will also end events.
+   *                     If false, end events cause an exception.
+   *
+   * @return             The next tag start or end event.
+   */
+  private XMLEvent expectTag(String expected, boolean allowEnd)
+      throws IOException {
+    XMLEvent ev = null;
+    while (true) {
+      try {
+        ev = events.nextEvent();
+      } catch (XMLStreamException e) {
+        throw new IOException("Expecting " + expected +
+            ", but got XMLStreamException", e);
+      }
+      switch (ev.getEventType()) {
+      case XMLEvent.ATTRIBUTE:
+        throw new IOException("Got unexpected attribute: " + ev);
+      case XMLEvent.CHARACTERS:
+        if (!ev.asCharacters().isWhiteSpace()) {
+          throw new IOException("Got unxpected characters while " +
+              "looking for " + expected + ": " +
+              ev.asCharacters().getData());
+        }
+        break;
+      case XMLEvent.END_ELEMENT:
+        if (!allowEnd) {
+          throw new IOException("Got unexpected end event " +
+              "while looking for " + expected);
+        }
+        return ev;
+      case XMLEvent.START_ELEMENT:
+        if (!expected.startsWith("[")) {
+          if (!ev.asStartElement().getName().getLocalPart().
+                equals(expected)) {
+            throw new IOException("Failed to find <" + expected + ">; " +
+                "got " + ev.asStartElement().getName().getLocalPart() +
+                " instead.");
+          }
+        }
+        return ev;
+      default:
+        // Ignore other event types like comment, etc.
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Skipping XMLEvent of type " +
+              ev.getEventType() + "(" +  ev + ")");
+        }
+        break;
+      }
+    }
+  }
+
+  private void expectTagEnd(String expected) throws IOException {
+    XMLEvent ev = expectTag(expected, true);
+    if (ev.getEventType() != XMLStreamConstants.END_ELEMENT) {
+      throw new IOException("Expected tag end event for " + expected +
+            ", but got: " + ev);
+    }
+    if (!expected.startsWith("[")) {
+      String tag = ev.asEndElement().getName().getLocalPart();
+      if (!tag.equals(expected)) {
+        throw new IOException("Expected tag end event for " + expected +
+        ", but got tag end event for " + tag);
+      }
+    }
+  }
+
+  private static class Node {
+    private static final String EMPTY = "";
+    HashMap<String, LinkedList<Node>> children;
+    String val = EMPTY;
+
+    void addChild(String key, Node node) {
+      if (children == null) {
+        children = new HashMap<>();
+      }
+      LinkedList<Node> cur = children.get(key);
+      if (cur == null) {
+        cur = new LinkedList<>();
+        children.put(key, cur);
+      }
+      cur.add(node);
+    }
+
+    Node removeChild(String key) {
+      if (children == null) {
+        return null;
+      }
+      LinkedList<Node> cur = children.get(key);
+      if (cur == null) {
+        return null;
+      }
+      Node node = cur.remove();
+      if ((node == null) || cur.isEmpty()) {
+        children.remove(key);
+      }
+      return node;
+    }
+
+    String removeChildStr(String key) {
+      Node child = removeChild(key);
+      if (child == null) {
+        return null;
+      }
+      if ((child.children != null) && (!child.children.isEmpty())) {
+        throw new RuntimeException("Node " + key + " contains children " +
+            "of its own.");
+      }
+      return child.getVal();
+    }
+
+    Integer removeChildInt(String key) throws IOException {
+      String str = removeChildStr(key);
+      if (str == null) {
+        return null;
+      }
+      return Integer.valueOf(str);
+    }
+
+    Long removeChildLong(String key) throws IOException {
+      String str = removeChildStr(key);
+      if (str == null) {
+        return null;
+      }
+      return Long.valueOf(str);
+    }
+
+    boolean removeChildBool(String key) throws IOException {
+      String str = removeChildStr(key);
+      if (str == null) {
+        return false;
+      }
+      return true;
+    }
+
+    String getRemainingKeyNames() {
+      if (children == null) {
+        return "";
+      }
+      return StringUtils.join(", ", children.keySet());
+    }
+
+    void verifyNoRemainingKeys(String sectionName) throws IOException {
+      String remainingKeyNames = getRemainingKeyNames();
+      if (!remainingKeyNames.isEmpty()) {
+        throw new IOException("Found unknown XML keys in " +
+            sectionName + ": " + remainingKeyNames);
+      }
+    }
+
+    void setVal(String val) {
+      this.val = val;
+    }
+
+    String getVal() {
+      return val;
+    }
+
+    String dump() {
+      StringBuilder bld = new StringBuilder();
+      if ((children != null) && (!children.isEmpty())) {
+        bld.append("{");
+      }
+      if (val != null) {
+        bld.append("[").append(val).append("]");
+      }
+      if ((children != null) && (!children.isEmpty())) {
+        String prefix = "";
+        for (Map.Entry<String, LinkedList<Node>> entry : children.entrySet()) {
+          for (Node n : entry.getValue()) {
+            bld.append(prefix);
+            bld.append(entry.getKey()).append(": ");
+            bld.append(n.dump());
+            prefix = ", ";
+          }
+        }
+        bld.append("}");
+      }
+      return bld.toString();
+    }
+  }
+
+  private void loadNodeChildrenHelper(Node parent, String expected,
+                                String terminators[]) throws IOException {
+    XMLEvent ev = null;
+    while (true) {
+      try {
+        ev = events.peek();
+        switch (ev.getEventType()) {
+        case XMLEvent.END_ELEMENT:
+          if (terminators.length != 0) {
+            return;
+          }
+          events.nextEvent();
+          return;
+        case XMLEvent.START_ELEMENT:
+          String key = ev.asStartElement().getName().getLocalPart();
+          for (String terminator : terminators) {
+            if (terminator.equals(key)) {
+              return;
+            }
+          }
+          events.nextEvent();
+          Node node = new Node();
+          parent.addChild(key, node);
+          loadNodeChildrenHelper(node, expected, new String[0]);
+          break;
+        case XMLEvent.CHARACTERS:
+          String val = XMLUtils.
+              unmangleXmlString(ev.asCharacters().getData(), true);
+          parent.setVal(val);
+          events.nextEvent();
+          break;
+        case XMLEvent.ATTRIBUTE:
+          throw new IOException("Unexpected XML event " + ev);
+        default:
+          // Ignore other event types like comment, etc.
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("Skipping XMLEvent " + ev);
+          }
+          events.nextEvent();
+          break;
+        }
+      } catch (XMLStreamException e) {
+        throw new IOException("Expecting " + expected +
+            ", but got XMLStreamException", e);
+      }
+    }
+  }
+
+  /**
+   * Load a subtree of the XML into a Node structure.
+   * We will keep consuming XML events until we exit the current subtree.
+   * If there are any terminators specified, we will always leave the
+   * terminating end tag event in the stream.
+   *
+   * @param parent         The node to fill in.
+   * @param expected       A string to display in exceptions.
+   * @param terminators    Entering any one of these XML tags terminates our
+   *                       traversal.
+   * @throws IOException
+   */
+  private void loadNodeChildren(Node parent, String expected,
+                                String... terminators) throws IOException {
+    loadNodeChildrenHelper(parent, expected, terminators);
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("loadNodeChildren(expected=" + expected +
+          ", terminators=[" + StringUtils.join(",", terminators) +
+          "]):" + parent.dump());
+    }
+  }
+
+  /**
+   * A processor for an FSImage XML section.
+   */
+  private interface SectionProcessor {
+    /**
+     * Process this section.
+     */
+    void process() throws IOException;
+  }
+
+  /**
+   * Processes the NameSection containing last allocated block ID, etc.
+   */
+  private class NameSectionProcessor implements SectionProcessor {
+    static final String NAME = "NameSection";
+
+    @Override
+    public void process() throws IOException {
+      Node node = new Node();
+      loadNodeChildren(node, "NameSection fields");
+      NameSystemSection.Builder b =  NameSystemSection.newBuilder();
+      Integer namespaceId = node.removeChildInt("namespaceId");
+      if (namespaceId == null)  {
+        throw new IOException("<NameSection> is missing <namespaceId>");
+      }
+      b.setNamespaceId(namespaceId);
+      Long lval = node.removeChildLong("genstampV1");
+      if (lval != null)  {
+        b.setGenstampV1(lval);
+      }
+      lval = node.removeChildLong("genstampV2");
+      if (lval != null)  {
+        b.setGenstampV2(lval);
+      }
+      lval = node.removeChildLong("genstampV1Limit");
+      if (lval != null)  {
+        b.setGenstampV1Limit(lval);
+      }
+      lval = node.removeChildLong("lastAllocatedBlockId");
+      if (lval != null)  {
+        b.setLastAllocatedBlockId(lval);
+      }
+      lval = node.removeChildLong("txid");
+      if (lval != null)  {
+        b.setTransactionId(lval);
+      }
+      lval = node.removeChildLong("rollingUpgradeStartTime");
+      if (lval != null)  {
+        b.setRollingUpgradeStartTime(lval);
+      }
+      lval = node.removeChildLong("lastAllocatedStripedBlockId");
+      if (lval != null)  {
+        b.setLastAllocatedStripedBlockId(lval);
+      }
+      node.verifyNoRemainingKeys("NameSection");
+      NameSystemSection s = b.build();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(SectionName.NS_INFO.name() + " writing header: {" +
+            TextFormat.printToString(s) + "}");
+      }
+      s.writeDelimitedTo(out);
+      recordSectionLength(SectionName.NS_INFO.name());
+    }
+  }
+
+  private class INodeSectionProcessor implements SectionProcessor {
+    static final String NAME = "INodeSection";
+
+    @Override
+    public void process() throws IOException {
+      Node headerNode = new Node();
+      loadNodeChildren(headerNode, "INodeSection fields", "inode");
+      INodeSection.Builder b =  INodeSection.newBuilder();
+      Long lval = headerNode.removeChildLong("lastInodeId");
+      if (lval != null)  {
+        b.setLastInodeId(lval);
+      }
+      Integer expectedNumINodes = headerNode.removeChildInt("numInodes");
+      if (expectedNumINodes == null) {
+        throw new IOException("Failed to find <numInodes> in INodeSection.");
+      }
+      b.setNumInodes(expectedNumINodes);
+      INodeSection s = b.build();
+      s.writeDelimitedTo(out);
+      headerNode.verifyNoRemainingKeys("INodeSection");
+      int actualNumINodes = 0;
+      while (actualNumINodes < expectedNumINodes) {
+        try {
+          expectTag("inode", false);
+        } catch (IOException e) {
+          throw new IOException("Only found " + actualNumINodes +
+              " <inode> entries out of " + expectedNumINodes, e);
+        }
+        actualNumINodes++;
+        Node inode = new Node();
+        loadNodeChildren(inode, "INode fields");
+        INodeSection.INode.Builder inodeBld = processINodeXml(inode);
+        inodeBld.build().writeDelimitedTo(out);
+      }
+      expectTagEnd("INodeSection");
+      recordSectionLength(SectionName.INODE.name());
+    }
+  }
+
+  private INodeSection.INode.Builder processINodeXml(Node node)
+      throws IOException {
+    String type = node.removeChildStr("type");
+    if (type == null) {
+      throw new IOException("INode XML found with no <type> tag.");
+    }
+    INodeSection.INode.Builder inodeBld = INodeSection.INode.newBuilder();
+    Long id = node.removeChildLong("id");
+    if (id == null) {
+      throw new IOException("<inode> found without <id>");
+    }
+    inodeBld.setId(id);
+    String name = node.removeChildStr("name");
+    if (name != null) {
+      inodeBld.setName(ByteString.copyFrom(name, "UTF8"));
+    }
+    switch (type) {
+    case "FILE":
+      processFileXml(node, inodeBld );
+      break;
+    case "DIRECTORY":
+      processDirectoryXml(node, inodeBld);
+      break;
+    case "SYMLINK":
+      processSymlinkXml(node, inodeBld);
+      break;
+    default:
+      throw new IOException("INode XML found with unknown <type> " +
+          "tag " + type);
+    }
+    node.verifyNoRemainingKeys("inode");
+    return inodeBld;
+  }
+
+  private void processFileXml(Node node, INodeSection.INode.Builder inodeBld)
+      throws IOException {
+    inodeBld.setType(INodeSection.INode.Type.FILE);
+    INodeSection.INodeFile.Builder bld = INodeSection.INodeFile.newBuilder();
+    Integer ival = node.removeChildInt("replication");
+    if (ival != null) {
+      bld.setReplication(ival);
+    }
+    Long lval = node.removeChildLong("mtime");
+    if (lval != null) {
+      bld.setModificationTime(lval);
+    }
+    lval = node.removeChildLong("atime");
+    if (lval != null) {
+      bld.setAccessTime(lval);
+    }
+    lval = node.removeChildLong("preferredBlockSize");
+    if (lval != null) {
+      bld.setPreferredBlockSize(lval);
+    }
+    String perm = node.removeChildStr("permission");
+    if (perm != null) {
+      bld.setPermission(permissionXmlToU64(perm));
+    }
+    Node blocks = node.removeChild("blocks");
+    if (blocks != null) {
+      while (true) {
+        Node block = blocks.removeChild("block");
+        if (block == null) {
+          break;
+        }
+        HdfsProtos.BlockProto.Builder blockBld =
+            HdfsProtos.BlockProto.newBuilder();
+        Long id = block.removeChildLong("id");
+        if (id == null) {
+          throw new IOException("<block> found without <id>");
+        }
+        blockBld.setBlockId(id);
+        Long genstamp = block.removeChildLong("genstamp");
+        if (genstamp == null) {
+          throw new IOException("<block> found without <genstamp>");
+        }
+        blockBld.setGenStamp(genstamp);
+        Long numBytes = block.removeChildLong("numBytes");
+        if (numBytes == null) {
+          throw new IOException("<block> found without <numBytes>");
+        }
+        blockBld.setNumBytes(numBytes);
+        bld.addBlocks(blockBld);
+      }
+    }
+    Node fileUnderConstruction = node.removeChild("file-under-construction");
+    if (fileUnderConstruction != null) {
+      INodeSection.FileUnderConstructionFeature.Builder fb =
+          INodeSection.FileUnderConstructionFeature.newBuilder();
+      String clientName =
+          fileUnderConstruction.removeChildStr("clientName");
+      if (clientName == null) {
+        throw new IOException("<file-under-construction> found without " +
+            "<clientName>");
+      }
+      fb.setClientName(clientName);
+      String clientMachine =
+          fileUnderConstruction.removeChildStr("clientMachine");
+      if (clientMachine == null) {
+        throw new IOException("<file-under-construction> found without " +
+            "<clientMachine>");
+      }
+      fb.setClientMachine(clientMachine);
+      bld.setFileUC(fb);
+    }
+    Node acls = node.removeChild("acls");
+    if (acls != null) {
+      bld.setAcl(aclXmlToProto(acls));
+    }
+    Node xattrs = node.removeChild("xattrs");
+    if (xattrs != null) {
+      bld.setXAttrs(xattrsXmlToProto(xattrs));
+    }
+    ival = node.removeChildInt("storagePolicyId");
+    if (ival != null) {
+      bld.setStoragePolicyID(ival);
+    }
+    Boolean bval = node.removeChildBool("isStriped");
+    bld.setIsStriped(bval);
+    inodeBld.setFile(bld);
+    // Will check remaining keys and serialize in processINodeXml
+  }
+
+  private void processDirectoryXml(Node node,
+          INodeSection.INode.Builder inodeBld) throws IOException {
+    inodeBld.setType(INodeSection.INode.Type.DIRECTORY);
+    INodeSection.INodeDirectory.Builder bld =
+        INodeSection.INodeDirectory.newBuilder();
+    Long lval = node.removeChildLong("mtime");
+    if (lval != null) {
+      bld.setModificationTime(lval);
+    }
+    lval = node.removeChildLong("nsquota");
+    if (lval != null) {
+      bld.setNsQuota(lval);
+    }
+    lval = node.removeChildLong("dsquota");
+    if (lval != null) {
+      bld.setDsQuota(lval);
+    }
+    String perm = node.removeChildStr("permission");
+    if (perm != null) {
+      bld.setPermission(permissionXmlToU64(perm));
+    }
+    Node acls = node.removeChild("acls");
+    if (acls != null) {
+      bld.setAcl(aclXmlToProto(acls));
+    }
+    Node xattrs = node.removeChild("xattrs");
+    if (xattrs != null) {
+      bld.setXAttrs(xattrsXmlToProto(xattrs));
+    }
+    INodeSection.QuotaByStorageTypeFeatureProto.Builder qf =
+        INodeSection.QuotaByStorageTypeFeatureProto.newBuilder();
+    while (true) {
+      Node typeQuota = node.removeChild("typeQuota");
+      if (typeQuota == null) {
+        break;
+      }
+      INodeSection.QuotaByStorageTypeEntryProto.Builder qbld =
+          INodeSection.QuotaByStorageTypeEntryProto.newBuilder();
+      String type = typeQuota.removeChildStr("type");
+      if (type == null) {
+        throw new IOException("<typeQuota> was missing <type>");
+      }
+      HdfsProtos.StorageTypeProto storageType =
+          HdfsProtos.StorageTypeProto.valueOf(type);
+      if (storageType == null) {
+        throw new IOException("<typeQuota> had unknown <type> " + type);
+      }
+      qbld.setStorageType(storageType);
+      Long quota = typeQuota.removeChildLong("quota");
+      if (quota == null) {
+        throw new IOException("<typeQuota> was missing <quota>");
+      }
+      qbld.setQuota(quota);
+      qf.addQuotas(qbld);
+    }
+    bld.setTypeQuotas(qf);
+    inodeBld.setDirectory(bld);
+    // Will check remaining keys and serialize in processINodeXml
+  }
+
+  private void processSymlinkXml(Node node,
+                                 INodeSection.INode.Builder inodeBld) throws IOException {
+    inodeBld.setType(INodeSection.INode.Type.SYMLINK);
+    INodeSection.INodeSymlink.Builder bld =
+        INodeSection.INodeSymlink.newBuilder();
+    String perm = node.removeChildStr("permission");
+    if (perm != null) {
+      bld.setPermission(permissionXmlToU64(perm));
+    }
+    String target = node.removeChildStr("target");
+    if (target != null) {
+      bld.setTarget(ByteString.copyFrom(target, "UTF8"));
+    }
+    Long lval = node.removeChildLong("mtime");
+    if (lval != null) {
+      bld.setModificationTime(lval);
+    }
+    lval = node.removeChildLong("atime");
+    if (lval != null) {
+      bld.setAccessTime(lval);
+    }
+    inodeBld.setSymlink(bld);
+    // Will check remaining keys and serialize in processINodeXml
+  }
+
+  private INodeSection.AclFeatureProto.Builder aclXmlToProto(Node acl)
+      throws IOException {
+    // TODO: support ACLs
+    throw new IOException("ACLs are not supported yet.");
+  }
+
+  private INodeSection.XAttrFeatureProto.Builder xattrsXmlToProto(Node xattrs)
+      throws IOException {
+    INodeSection.XAttrFeatureProto.Builder bld =
+        INodeSection.XAttrFeatureProto.newBuilder();
+    while (true) {
+      Node xattr = xattrs.removeChild("xattr");
+      if (xattr == null) {
+        break;
+      }
+      INodeSection.XAttrCompactProto.Builder b =
+          INodeSection.XAttrCompactProto.newBuilder();
+      String ns = xattr.removeChildStr("ns");
+      if (ns == null) {
+        throw new IOException("<xattr> had no <ns> entry.");
+      }
+      int nsIdx = XAttrProtos.XAttrProto.
+          XAttrNamespaceProto.valueOf(ns).ordinal();
+      String name = xattr.removeChildStr("name");
+      String valStr = xattr.removeChildStr("val");
+      byte[] val = null;
+      if (valStr == null) {
+        String valHex = xattr.removeChildStr("valHex");
+        if (valHex == null) {
+          throw new IOException("<xattr> had no <val> or <valHex> entry.");
+        }
+        val = new HexBinaryAdapter().unmarshal(valHex);
+      } else {
+        val = valStr.getBytes("UTF8");
+      }
+      b.setValue(ByteString.copyFrom(val));
+
+      // The XAttrCompactProto name field uses a fairly complex format
+      // to encode both the string table ID of the xattr name and the
+      // namespace ID.  See the protobuf file for details.
+      int nameId = registerStringId(name);
+      int encodedName = (nameId << XATTR_NAME_OFFSET) |
+          ((nsIdx & XATTR_NAMESPACE_MASK) << XATTR_NAMESPACE_OFFSET) |
+          (((nsIdx >> 2) & XATTR_NAMESPACE_EXT_MASK)
+              << XATTR_NAMESPACE_EXT_OFFSET);
+      b.setName(encodedName);
+      xattr.verifyNoRemainingKeys("xattr");
+      bld.addXAttrs(b);
+    }
+    xattrs.verifyNoRemainingKeys("xattrs");
+    return bld;
+  }
+
+  private class SecretManagerSectionProcessor implements SectionProcessor {
+    static final String NAME = "SecretManagerSection";
+
+    @Override
+    public void process() throws IOException {
+      Node secretHeader = new Node();
+      loadNodeChildren(secretHeader, "SecretManager fields",
+          "delegationKey", "token");
+      SecretManagerSection.Builder b =  SecretManagerSection.newBuilder();
+      Integer currentId = secretHeader.removeChildInt("currentId");
+      if (currentId == null) {
+        throw new IOException("SecretManager section had no <currentId>");
+      }
+      b.setCurrentId(currentId);
+      Integer tokenSequenceNumber = secretHeader.removeChildInt("tokenSequenceNumber");
+      if (tokenSequenceNumber == null) {
+        throw new IOException("SecretManager section had no " +
+            "<tokenSequenceNumber>");
+      }
+      b.setTokenSequenceNumber(tokenSequenceNumber);
+      Integer expectedNumKeys = secretHeader.removeChildInt("numDelegationKeys");
+      if (expectedNumKeys == null) {
+        throw new IOException("SecretManager section had no " +
+            "<numDelegationKeys>");
+      }
+      b.setNumKeys(expectedNumKeys);
+      Integer expectedNumTokens = secretHeader.removeChildInt("numTokens");
+      if (expectedNumTokens == null) {
+        throw new IOException("SecretManager section had no " +
+            "<numTokens>");
+      }
+      b.setNumTokens(expectedNumTokens);
+      secretHeader.verifyNoRemainingKeys("SecretManager");
+      b.build().writeDelimitedTo(out);
+      for (int actualNumKeys = 0; actualNumKeys < expectedNumKeys;
+           actualNumKeys++) {
+        try {
+          expectTag("delegationKey", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + actualNumKeys +
+              " delegation keys out of " + expectedNumKeys, e);
+        }
+        SecretManagerSection.DelegationKey.Builder dbld =
+            SecretManagerSection.DelegationKey.newBuilder();
+        Node dkey = new Node();
+        loadNodeChildren(dkey, "Delegation key fields");
+        Integer id = dkey.removeChildInt("id");
+        if (id == null) {
+          throw new IOException("Delegation key stanza <delegationKey> " +
+              "lacked an <id> field.");
+        }
+        dbld.setId(id);
+        String expiry = dkey.removeChildStr("expiry");
+        if (expiry == null) {
+          throw new IOException("Delegation key stanza <delegationKey> " +
+              "lacked an <expiry> field.");
+        }
+        dbld.setExpiryDate(dateStrToLong(expiry));
+        String keyHex = dkey.removeChildStr("key");
+        if (keyHex == null) {
+          throw new IOException("Delegation key stanza <delegationKey> " +
+              "lacked a <key> field.");
+        }
+        byte[] key = new HexBinaryAdapter().unmarshal(keyHex);
+        dkey.verifyNoRemainingKeys("delegationKey");
+        dbld.setKey(ByteString.copyFrom(key));
+        dbld.build().writeDelimitedTo(out);
+      }
+      for (int actualNumTokens = 0; actualNumTokens < expectedNumTokens;
+           actualNumTokens++) {
+        try {
+          expectTag("token", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + actualNumTokens +
+              " tokens out of " + expectedNumTokens, e);
+        }
+        SecretManagerSection.PersistToken.Builder tbld =
+            SecretManagerSection.PersistToken.newBuilder();
+        Node token = new Node();
+        loadNodeChildren(token, "PersistToken key fields");
+        Integer version = token.removeChildInt("version");
+        if (version != null) {
+          tbld.setVersion(version);
+        }
+        String owner = token.removeChildStr("owner");
+        if (owner != null) {
+          tbld.setOwner(owner);
+        }
+        String renewer = token.removeChildStr("renewer");
+        if (renewer != null) {
+          tbld.setRenewer(renewer);
+        }
+        String realUser = token.removeChildStr("realUser");
+        if (realUser != null) {
+          tbld.setRealUser(realUser);
+        }
+        String issueDateStr = token.removeChildStr("issueDate");
+        if (issueDateStr != null) {
+          tbld.setIssueDate(dateStrToLong(issueDateStr));
+        }
+        String maxDateStr = token.removeChildStr("maxDate");
+        if (maxDateStr != null) {
+          tbld.setMaxDate(dateStrToLong(maxDateStr));
+        }
+        Integer seqNo = token.removeChildInt("sequenceNumber");
+        if (seqNo != null) {
+          tbld.setSequenceNumber(seqNo);
+        }
+        Integer masterKeyId = token.removeChildInt("masterKeyId");
+        if (masterKeyId != null) {
+          tbld.setMasterKeyId(masterKeyId);
+        }
+        String expiryDateStr = token.removeChildStr("expiryDate");
+        if (expiryDateStr != null) {
+          tbld.setExpiryDate(dateStrToLong(expiryDateStr));
+        }
+        token.verifyNoRemainingKeys("token");
+        tbld.build().writeDelimitedTo(out);
+      }
+      expectTagEnd("SecretManagerSection");
+      recordSectionLength(SectionName.SECRET_MANAGER.name());
+    }
+
+    private long dateStrToLong(String dateStr) throws IOException {
+      try {
+        Date date = isoDateFormat.parse(dateStr);
+        return date.getTime();
+      } catch (ParseException e) {
+        throw new IOException("Failed to parse ISO date string " + dateStr, e);
+      }
+    }
+  }
+
+  private class CacheManagerSectionProcessor implements SectionProcessor {
+    static final String NAME = "CacheManagerSection";
+
+    @Override
+    public void process() throws IOException {
+      Node node = new Node();
+      loadNodeChildren(node, "CacheManager fields", "pool", "directive");
+      CacheManagerSection.Builder b =  CacheManagerSection.newBuilder();
+      Long nextDirectiveId = node.removeChildLong("nextDirectiveId");
+      if (nextDirectiveId == null) {
+        throw new IOException("CacheManager section had no <nextDirectiveId>");
+      }
+      b.setNextDirectiveId(nextDirectiveId);
+      Integer expectedNumPools = node.removeChildInt("numPools");
+      if (expectedNumPools == null) {
+        throw new IOException("CacheManager section had no <numPools>");
+      }
+      b.setNumPools(expectedNumPools);
+      Integer expectedNumDirectives = node.removeChildInt("numDirectives");
+      if (expectedNumDirectives == null) {
+        throw new IOException("CacheManager section had no <numDirectives>");
+      }
+      b.setNumDirectives(expectedNumDirectives);
+      b.build().writeDelimitedTo(out);
+      long actualNumPools = 0;
+      while (actualNumPools < expectedNumPools) {
+        try {
+          expectTag("pool", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + actualNumPools +
+              " cache pools out of " + expectedNumPools, e);
+        }
+        actualNumPools++;
+        Node pool = new Node();
+        loadNodeChildren(pool, "pool fields", "");
+        processPoolXml(node);
+      }
+      long actualNumDirectives = 0;
+      while (actualNumDirectives < expectedNumDirectives) {
+        try {
+          expectTag("directive", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + actualNumDirectives +
+              " cache pools out of " + expectedNumDirectives, e);
+        }
+        actualNumDirectives++;
+        Node pool = new Node();
+        loadNodeChildren(pool, "directive fields", "");
+        processDirectiveXml(node);
+      }
+      expectTagEnd("CacheManagerSection");
+      recordSectionLength(SectionName.CACHE_MANAGER.name());
+    }
+
+    private void processPoolXml(Node pool) throws IOException {
+      CachePoolInfoProto.Builder bld = CachePoolInfoProto.newBuilder();
+      String poolName = pool.removeChildStr("poolName");
+      if (poolName == null) {
+        throw new IOException("<pool> found without <poolName>");
+      }
+      bld.setPoolName(poolName);
+      String ownerName = pool.removeChildStr("ownerName");
+      if (ownerName == null) {
+        throw new IOException("<pool> found without <ownerName>");
+      }
+      bld.setOwnerName(ownerName);
+      String groupName = pool.removeChildStr("groupName");
+      if (groupName == null) {
+        throw new IOException("<pool> found without <groupName>");
+      }
+      bld.setGroupName(groupName);
+      Integer mode = pool.removeChildInt("mode");
+      if (mode == null) {
+        throw new IOException("<pool> found without <mode>");
+      }
+      bld.setMode(mode);
+      Long limit = pool.removeChildLong("limit");
+      if (limit == null) {
+        throw new IOException("<pool> found without <limit>");
+      }
+      bld.setLimit(limit);
+      Long maxRelativeExpiry = pool.removeChildLong("maxRelativeExpiry");
+      if (maxRelativeExpiry == null) {
+        throw new IOException("<pool> found without <maxRelativeExpiry>");
+      }
+      bld.setMaxRelativeExpiry(maxRelativeExpiry);
+      pool.verifyNoRemainingKeys("pool");
+      bld.build().writeDelimitedTo(out);
+    }
+
+    private void processDirectiveXml(Node directive) throws IOException {
+      CacheDirectiveInfoProto.Builder bld =
+          CacheDirectiveInfoProto.newBuilder();
+      Long id = directive.removeChildLong("id");
+      if (id == null) {
+        throw new IOException("<directive> found without <id>");
+      }
+      bld.setId(id);
+      String path = directive.removeChildStr("path");
+      if (path == null) {
+        throw new IOException("<directive> found without <path>");
+      }
+      bld.setPath(path);
+      Integer replication = directive.removeChildInt("replication");
+      if (replication == null) {
+        throw new IOException("<directive> found without <replication>");
+      }
+      bld.setReplication(replication);
+      String pool = directive.removeChildStr("pool");
+      if (path == null) {
+        throw new IOException("<directive> found without <pool>");
+      }
+      bld.setPool(pool);
+      Node expiration = directive.removeChild("expiration");
+      if (expiration != null) {
+        CacheDirectiveInfoExpirationProto.Builder ebld =
+            CacheDirectiveInfoExpirationProto.newBuilder();
+        Long millis = expiration.removeChildLong("millis");
+        if (millis == null) {
+          throw new IOException("cache directive <expiration> found " +
+              "without <millis>");
+        }
+        ebld.setMillis(millis);
+        if (expiration.removeChildBool("relative")) {
+          ebld.setIsRelative(true);
+        } else {
+          ebld.setIsRelative(false);
+        }
+        bld.setExpiration(ebld);
+      }
+      directive.verifyNoRemainingKeys("directive");
+      bld.build().writeDelimitedTo(out);
+    }
+  }
+
+  private class INodeReferenceSectionProcessor implements SectionProcessor {
+    static final String NAME = "INodeReferenceSection";
+
+    @Override
+    public void process() throws IOException {
+      // There is no header for this section.
+      // We process the repeated <ref> elements.
+      while (true) {
+        XMLEvent ev = expectTag("ref", true);
+        if (ev.isEndElement()) {
+          break;
+        }
+        Node inodeRef = new Node();
+        FsImageProto.INodeReferenceSection.INodeReference.Builder bld =
+            FsImageProto.INodeReferenceSection.INodeReference.newBuilder();
+        loadNodeChildren(inodeRef, "INodeReference");
+        Long referredId = inodeRef.removeChildLong("referredId");
+        if (referredId != null) {
+          bld.setReferredId(referredId);
+        }
+        String name = inodeRef.removeChildStr("name");
+        if (name != null) {
+          bld.setName(ByteString.copyFrom(name, "UTF8"));
+        }
+        Integer dstSnapshotId = inodeRef.removeChildInt("dstSnapshotId");
+        if (dstSnapshotId != null) {
+          bld.setDstSnapshotId(dstSnapshotId);
+        }
+        Integer lastSnapshotId = inodeRef.removeChildInt("lastSnapshotId");
+        if (lastSnapshotId != null) {
+          bld.setLastSnapshotId(lastSnapshotId);
+        }
+        inodeRef.verifyNoRemainingKeys("ref");
+        bld.build().writeDelimitedTo(out);
+      }
+      recordSectionLength(SectionName.INODE_REFERENCE.name());
+    }
+  }
+
+  private class INodeDirectorySectionProcessor implements SectionProcessor {
+    static final String NAME = "INodeDirectorySection";
+
+    @Override
+    public void process() throws IOException {
+      // No header for this section
+      // Process the repeated <directory> elements.
+      while (true) {
+        XMLEvent ev = expectTag("directory", true);
+        if (ev.isEndElement()) {
+          break;
+        }
+        Node directory = new Node();
+        FsImageProto.INodeDirectorySection.DirEntry.Builder bld =
+            FsImageProto.INodeDirectorySection.DirEntry.newBuilder();
+        loadNodeChildren(directory, "directory");
+        Long parent = directory.removeChildLong("parent");
+        if (parent != null) {
+          bld.setParent(parent);
+        }
+        while (true) {
+          Node child = directory.removeChild("child");
+          if (child == null) {
+            break;
+          }
+          bld.addChildren(Long.parseLong(child.getVal()));
+        }
+        while (true) {
+          Node refChild = directory.removeChild("refChild");
+          if (refChild == null) {
+            break;
+          }
+          bld.addRefChildren(Integer.parseInt(refChild.getVal()));
+        }
+        directory.verifyNoRemainingKeys("directory");
+        bld.build().writeDelimitedTo(out);
+      }
+      recordSectionLength(SectionName.INODE_DIR.name());
+    }
+  }
+
+  private class FilesUnderConstructionSectionProcessor
+      implements SectionProcessor {
+    static final String NAME = "FileUnderConstructionSection";
+
+    @Override
+    public void process() throws IOException {
+      // No header for this section type.
+      // Process the repeated files under construction elements.
+      while (true) {
+        XMLEvent ev = expectTag("inode", true);
+        if (ev.isEndElement()) {
+          break;
+        }
+        Node fileUnderConstruction = new Node();
+        loadNodeChildren(fileUnderConstruction, "file under construction");
+        FileUnderConstructionEntry.Builder bld =
+            FileUnderConstructionEntry.newBuilder();
+        Long id = fileUnderConstruction.removeChildLong("id");
+        if (id != null) {
+          bld.setInodeId(id);
+        }
+        String fullpath = fileUnderConstruction.removeChildStr("path");
+        if (fullpath != null) {
+          bld.setFullPath(fullpath);
+        }
+        fileUnderConstruction.verifyNoRemainingKeys("inode");
+        bld.build().writeDelimitedTo(out);
+      }
+      recordSectionLength(SectionName.FILES_UNDERCONSTRUCTION.name());
+    }
+  }
+
+  private class SnapshotSectionProcessor implements SectionProcessor {
+    static final String NAME = "SnapshotSection";
+
+    @Override
+    public void process() throws IOException {
+      FsImageProto.SnapshotSection.Builder bld =
+          FsImageProto.SnapshotSection.newBuilder();
+      Node header = new Node();
+      loadNodeChildren(header, "SnapshotSection fields", "snapshot");
+      Integer snapshotCounter = header.removeChildInt("snapshotCounter");
+      if (snapshotCounter == null) {
+        throw new IOException("No <snapshotCounter> entry found in " +
+            "SnapshotSection header");
+      }
+      bld.setSnapshotCounter(snapshotCounter);
+      Integer expectedNumSnapshots = header.removeChildInt("numSnapshots");
+      if (expectedNumSnapshots == null) {
+        throw new IOException("No <numSnapshots> entry found in " +
+            "SnapshotSection header");
+      }
+      bld.setNumSnapshots(expectedNumSnapshots);
+      while (true) {
+        Node sd = header.removeChild("snapshottableDir");
+        if (sd == null) {
+          break;
+        }
+        Long dir = sd.removeChildLong("dir");
+        sd.verifyNoRemainingKeys("<dir>");
+        bld.addSnapshottableDir(dir);
+      }
+      header.verifyNoRemainingKeys("SnapshotSection");
+      bld.build().writeDelimitedTo(out);
+      int actualNumSnapshots = 0;
+      while (actualNumSnapshots < expectedNumSnapshots) {
+        try {
+          expectTag("snapshot", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + actualNumSnapshots +
+              " <snapshot> entries out of " + expectedNumSnapshots, e);
+        }
+        actualNumSnapshots++;
+        Node snapshot = new Node();
+        loadNodeChildren(snapshot, "snapshot fields");
+        FsImageProto.SnapshotSection.Snapshot.Builder s =
+            FsImageProto.SnapshotSection.Snapshot.newBuilder();
+        Integer snapshotId = snapshot.removeChildInt("id");
+        if (snapshotId == null) {
+          throw new IOException("<snapshot> section was missing <id>");
+        }
+        s.setSnapshotId(snapshotId);
+        Node snapshotRoot = snapshot.removeChild("root");
+        INodeSection.INode.Builder inodeBld = processINodeXml(snapshotRoot);
+        s.setRoot(inodeBld);
+        s.build().writeDelimitedTo(out);
+      }
+      expectTagEnd("SnapshotSection");
+      recordSectionLength(SectionName.SNAPSHOT.name());
+    }
+  }
+
+  private class SnapshotDiffSectionProcessor implements SectionProcessor {
+    static final String NAME = "SnapshotDiffSection";
+
+    @Override
+    public void process() throws IOException {
+      // No header for this section type.
+      LOG.debug("Processing SnapshotDiffSection");
+      while (true) {
+        XMLEvent ev = expectTag("[diff start tag]", true);
+        if (ev.isEndElement()) {
+          String name = ev.asEndElement().getName().getLocalPart();
+          if (name.equals("SnapshotDiffSection")) {
+            break;
+          }
+          throw new IOException("Got unexpected end tag for " + name);
+        }
+        String tagName = ev.asStartElement().getName().getLocalPart();
+        if (tagName.equals("dirDiffEntry")) {
+          processDirDiffEntry();
+        } else if (tagName.equals("fileDiffEntry")) {
+          processFileDiffEntry();
+        } else {
+          throw new IOException("SnapshotDiffSection contained unexpected " +
+              "tag " + tagName);
+        }
+      }
+      recordSectionLength(SectionName.SNAPSHOT_DIFF.name());
+    }
+
+    private void processDirDiffEntry() throws IOException {
+      LOG.debug("Processing dirDiffEntry");
+      DiffEntry.Builder headerBld = DiffEntry.newBuilder();
+      headerBld.setType(DiffEntry.Type.DIRECTORYDIFF);
+      Node dirDiffHeader = new Node();
+      loadNodeChildren(dirDiffHeader, "dirDiffEntry fields", "dirDiff");
+      Long inodeId = dirDiffHeader.removeChildLong("inodeId");
+      if (inodeId == null) {
+        throw new IOException("<dirDiffEntry> contained no <inodeId> entry.");
+      }
+      headerBld.setInodeId(inodeId);
+      Integer expectedDiffs = dirDiffHeader.removeChildInt("count");
+      if (expectedDiffs == null) {
+        throw new IOException("<dirDiffEntry> contained no <count> entry.");
+      }
+      headerBld.setNumOfDiff(expectedDiffs);
+      dirDiffHeader.verifyNoRemainingKeys("dirDiffEntry");
+      headerBld.build().writeDelimitedTo(out);
+      for (int actualDiffs = 0; actualDiffs < expectedDiffs; actualDiffs++) {
+        try {
+          expectTag("dirDiff", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + (actualDiffs + 1) +
+              " diffs out of " + expectedDiffs, e);
+        }
+        Node dirDiff = new Node();
+        loadNodeChildren(dirDiff, "dirDiff fields");
+        FsImageProto.SnapshotDiffSection.DirectoryDiff.Builder bld =
+            FsImageProto.SnapshotDiffSection.DirectoryDiff.newBuilder();
+        Integer snapshotId = dirDiff.removeChildInt("snapshotId");
+        if (snapshotId != null) {
+          bld.setSnapshotId(snapshotId);
+        }
+        Integer childrenSize = dirDiff.removeChildInt("childrenSize");
+        if (childrenSize == null) {
+          throw new IOException("Expected to find <childrenSize> in " +
+              "<dirDiff> section.");
+        }
+        bld.setIsSnapshotRoot(dirDiff.removeChildBool("isSnapshotRoot"));
+        bld.setChildrenSize(childrenSize);
+        String name = dirDiff.removeChildStr("name");
+        if (name != null) {
+          bld.setName(ByteString.copyFrom(name, "UTF8"));
+        }
+        // TODO: add missing snapshotCopy field to XML
+        Integer expectedCreatedListSize =
+            dirDiff.removeChildInt("createdListSize");
+        if (expectedCreatedListSize == null) {
+          throw new IOException("Expected to find <createdListSize> in " +
+              "<dirDiff> section.");
+        }
+        bld.setCreatedListSize(expectedCreatedListSize);
+        while (true) {
+          Node deleted = dirDiff.removeChild("deletedInode");
+          if (deleted == null){
+            break;
+          }
+          bld.addDeletedINode(Long.parseLong(deleted.getVal()));
+        }
+        while (true) {
+          Node deleted = dirDiff.removeChild("deletedInoderef");
+          if (deleted == null){
+            break;
+          }
+          bld.addDeletedINodeRef(Integer.parseInt(deleted.getVal()));
+        }
+        bld.build().writeDelimitedTo(out);
+        // After the DirectoryDiff header comes a list of CreatedListEntry PBs.
+        int actualCreatedListSize = 0;
+        while (true) {
+          Node created = dirDiff.removeChild("created");
+          if (created == null){
+            break;
+          }
+          String cleName = created.removeChildStr("name");
+          if (cleName == null) {
+            throw new IOException("Expected <created> entry to have " +
+                "a <name> field");
+          }
+          created.verifyNoRemainingKeys("created");
+          FsImageProto.SnapshotDiffSection.CreatedListEntry.newBuilder().
+              setName(ByteString.copyFrom(cleName, "UTF8")).
+              build().writeDelimitedTo(out);
+          actualCreatedListSize++;
+        }
+        if (actualCreatedListSize != expectedCreatedListSize) {
+          throw new IOException("<createdListSize> was " +
+              expectedCreatedListSize +", but there were " +
+              actualCreatedListSize + " <created> entries.");
+        }
+        dirDiff.verifyNoRemainingKeys("dirDiff");
+      }
+      expectTagEnd("dirDiffEntry");
+    }
+
+    private void processFileDiffEntry() throws IOException {
+      LOG.debug("Processing fileDiffEntry");
+      DiffEntry.Builder headerBld = DiffEntry.newBuilder();
+      headerBld.setType(DiffEntry.Type.FILEDIFF);
+      Node fileDiffHeader = new Node();
+      loadNodeChildren(fileDiffHeader, "fileDiffEntry fields", "fileDiff");
+      Long inodeId = fileDiffHeader.removeChildLong("inodeid");
+      if (inodeId == null) {
+        throw new IOException("<fileDiffEntry> contained no <inodeid> entry.");
+      }
+      headerBld.setInodeId(inodeId);
+      Integer expectedDiffs = fileDiffHeader.removeChildInt("count");
+      if (expectedDiffs == null) {
+        throw new IOException("<fileDiffEntry> contained no <count> entry.");
+      }
+      headerBld.setNumOfDiff(expectedDiffs);
+      fileDiffHeader.verifyNoRemainingKeys("fileDiffEntry");
+      headerBld.build().writeDelimitedTo(out);
+      for (int actualDiffs = 0; actualDiffs < expectedDiffs; actualDiffs++) {
+        try {
+          expectTag("fileDiff", false);
+        } catch (IOException e) {
+          throw new IOException("Only read " + (actualDiffs + 1) +
+              " diffs out of " + expectedDiffs, e);
+        }
+        Node fileDiff = new Node();
+        loadNodeChildren(fileDiff, "fileDiff fields");
+        FsImageProto.SnapshotDiffSection.FileDiff.Builder bld =
+            FsImageProto.SnapshotDiffSection.FileDiff.newBuilder();
+        Integer snapshotId = fileDiff.removeChildInt("snapshotId");
+        if (snapshotId != null) {
+          bld.setSnapshotId(snapshotId);
+        }
+        Long size = fileDiff.removeChildLong("size");
+        if (size != null) {
+          bld.setFileSize(size);
+        }
+        String name = fileDiff.removeChildStr("name");
+        if (name != null) {
+          bld.setName(ByteString.copyFrom(name, "UTF8"));
+        }
+        // TODO: missing snapshotCopy
+        // TODO: missing blocks
+        fileDiff.verifyNoRemainingKeys("fileDiff");
+        bld.build().writeDelimitedTo(out);
+      }
+      expectTagEnd("fileDiffEntry");
+    }
+  }
+
+  /**
+   * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64)
+   * (in Big Endian).  The first and the second parts are the string ids
+   * of the user and group name, and the last 16 bits are the permission bits.
+   *
+   * @param perm           The permission string from the XML.
+   * @return               The 64-bit value to use in the fsimage for permission.
+   * @throws IOException   If we run out of string IDs in the string table.
+   */
+  private long permissionXmlToU64(String perm) throws IOException {
+    String components[] = perm.split(":");
+    if (components.length != 3) {
+      throw new IOException("Unable to parse permission string " + perm +
+          ": expected 3 components, but only had " + components.length);
+    }
+    String userName = components[0];
+    String groupName = components[1];
+    String modeString = components[2];
+    long userNameId = registerStringId(userName);
+    long groupNameId = registerStringId(groupName);
+    long mode = new FsPermission(modeString).toShort();
+    return (userNameId << 40) | (groupNameId << 16) | mode;
+  }
+
+  /**
+   * The FSImage contains a string table which maps strings to IDs.
+   * This is a simple form of compression which takes advantage of the fact
+   * that the same strings tend to occur over and over again.
+   * This function will return an ID which we can use to represent the given
+   * string.  If the string already exists in the string table, we will use
+   * that ID; otherwise, we will allocate a new one.
+   *
+   * @param str           The string.
+   * @return              The ID in the string table.
+   * @throws IOException  If we run out of bits in the string table.  We only
+   *                      have 25 bits.
+   */
+  int registerStringId(String str) throws IOException {
+    Integer id = stringTable.get(str);
+    if (id != null) {
+      return id;
+    }
+    int latestId = latestStringId;
+    if (latestId >= 0x1ffffff) {
+      throw new IOException("Cannot have more than 2**25 " +
+          "strings in the fsimage, because of the limitation on " +
+          "the size of string table IDs.");
+    }
+    stringTable.put(str, latestId);
+    latestStringId++;
+    return latestId;
+  }
+
+  /**
+   * Record the length of a section of the FSImage in our FileSummary object.
+   * The FileSummary appears at the end of the FSImage and acts as a table of
+   * contents for the file.
+   *
+   * @param sectionNamePb  The name of the section as it should appear in
+   *                       the fsimage.  (This is different than the XML
+   *                       name.)
+   * @throws IOException
+   */
+  void recordSectionLength(String sectionNamePb) throws IOException {
+    long curSectionStartOffset = sectionStartOffset;
+    long curPos = out.getCount();
+    //if (sectionNamePb.equals(SectionName.STRING_TABLE.name())) {
+      fileSummaryBld.addSections(FileSummary.Section.newBuilder().
+          setName(sectionNamePb).
+          setLength(curPos - curSectionStartOffset).
+          setOffset(curSectionStartOffset));
+    //}
+    sectionStartOffset = curPos;
+  }
+
+  /**
+   * Read the version tag which starts the XML file.
+   */
+  private void readVersion() throws IOException {
+    try {
+      expectTag("version", false);
+    } catch (IOException e) {
+      // Handle the case where <version> does not exist.
+      // Note: fsimage XML files which are missing <version> are also missing
+      // many other fields that ovi needs to accurately reconstruct the
+      // fsimage.
+      throw new IOException("No <version> section found at the top of " +
+          "the fsimage XML.  This XML file is too old to be processed " +
+          "by ovi.", e);
+    }
+    Node version = new Node();
+    loadNodeChildren(version, "version fields");
+    Integer onDiskVersion = version.removeChildInt("onDiskVersion");
+    if (onDiskVersion == null) {
+      throw new IOException("The <version> section doesn't contain " +
+          "the onDiskVersion.");
+    }
+    Integer layoutVersion = version.removeChildInt("layoutVersion");
+    if (layoutVersion == null) {
+      throw new IOException("The <version> section doesn't contain " +
+          "the layoutVersion.");
+    }
+    fileSummaryBld.setOndiskVersion(onDiskVersion);
+    fileSummaryBld.setLayoutVersion(layoutVersion);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Loaded <version> with onDiskVersion=" + onDiskVersion +
+          ", layoutVersion=" + layoutVersion + ".");
+    }
+  }
+
+  /**
+   * Write the string table to the fsimage.
+   * @throws IOException
+   */
+  private void writeStringTableSection() throws IOException {
+    FsImageProto.StringTableSection sectionHeader =
+        FsImageProto.StringTableSection.newBuilder().
+        setNumEntry(stringTable.size()).build();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(SectionName.STRING_TABLE.name() + " writing header: {" +
+            TextFormat.printToString(sectionHeader) + "}");
+    }
+    sectionHeader.writeDelimitedTo(out);
+
+    // The entries don't have to be in any particular order, so iterating
+    // over the hash table is fine.
+    for (Map.Entry<String, Integer> entry : stringTable.entrySet()) {
+      FsImageProto.StringTableSection.Entry stEntry =
+          FsImageProto.StringTableSection.Entry.newBuilder().
+          setStr(entry.getKey()).
+          setId(entry.getValue()).
+          build();
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Writing string table entry: {" +
+            TextFormat.printToString(stEntry) + "}");
+      }
+      stEntry.writeDelimitedTo(out);
+    }
+    recordSectionLength(SectionName.STRING_TABLE.name());
+  }
+
+  /**
+   * Processes the XML file back into an fsimage.
+   */
+  private void processXml() throws Exception {
+    LOG.debug("Loading <fsimage>.");
+    expectTag("fsimage", false);
+    // Read the <version> tag.
+    readVersion();
+    // Write the HDFSIMG1 magic number which begins the fsimage file.
+    out.write(FSImageUtil.MAGIC_HEADER);
+    // Write a series of fsimage sections.
+    sectionStartOffset = FSImageUtil.MAGIC_HEADER.length;
+    final HashSet<String> unprocessedSections =
+        new HashSet<>(sections.keySet());
+    while (!unprocessedSections.isEmpty()) {
+      XMLEvent ev = expectTag("[section header]", true);
+      if (ev.getEventType() == XMLStreamConstants.END_ELEMENT) {
+        if (ev.asEndElement().getName().getLocalPart().equals("fsimage")) {
+          throw new IOException("FSImage XML ended prematurely, without " +
+              "including section(s) " + StringUtils.join(", ",
+              unprocessedSections));
+        }
+        throw new IOException("Got unexpected tag end event for " +
+            ev.asEndElement().getName().getLocalPart() + " while looking " +
+            "for section header tag.");
+      } else if (ev.getEventType() != XMLStreamConstants.START_ELEMENT) {
+        throw new IOException("Expected section header START_ELEMENT; " +
+            "got event of type " + ev.getEventType());
+      }
+      String sectionName = ev.asStartElement().getName().getLocalPart();
+      if (!unprocessedSections.contains(sectionName)) {
+        throw new IOException("Unknown or duplicate section found for " +
+            sectionName);
+      }
+      SectionProcessor sectionProcessor = sections.get(sectionName);
+      if (sectionProcessor == null) {
+        throw new IOException("Unknown FSImage section " + sectionName +
+            ".  Valid section names are [" +
+            StringUtils.join(", ", sections.keySet()) + "]");
+      }
+      unprocessedSections.remove(sectionName);
+      sectionProcessor.process();
+    }
+
+    // Write the StringTable section to disk.
+    // This has to be done after the other sections, since some of them
+    // add entries to the string table.
+    writeStringTableSection();
+
+    // Write the FileSummary section to disk.
+    // This section is always last.
+    long prevOffset = out.getCount();
+    FileSummary fileSummary = fileSummaryBld.build();
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Writing FileSummary: {" +
+          TextFormat.printToString(fileSummary) + "}");
+    }
+    // Even though the last 4 bytes of the file gives the FileSummary length,
+    // we still write a varint first that also contains the length.
+    fileSummary.writeDelimitedTo(out);
+
+    // Write the length of the FileSummary section as a fixed-size big
+    // endian 4-byte quantity.
+    int summaryLen = Ints.checkedCast(out.getCount() - prevOffset);
+    byte[] summaryLenBytes = new byte[4];
+    ByteBuffer.wrap(summaryLenBytes).asIntBuffer().put(summaryLen);
+    out.write(summaryLenBytes);
+  }
+
+  /**
+   * Run the OfflineImageReconstructor.
+   *
+   * @param inputPath         The input path to use.
+   * @param outputPath        The output path to use.
+   *
+   * @throws Exception        On error.
+   */
+  public static void run(String inputPath, String outputPath)
+      throws Exception {
+    MessageDigest digester = MD5Hash.getDigester();
+    FileOutputStream fout = null;
+    File foutHash = new File(outputPath + ".md5");
+    Files.deleteIfExists(foutHash.toPath()); // delete any .md5 file that exists
+    CountingOutputStream out = null;
+    FileInputStream fis = null;
+    InputStreamReader reader = null;
+    try {
+      Files.deleteIfExists(Paths.get(outputPath));
+      fout = new FileOutputStream(outputPath);
+      fis = new FileInputStream(inputPath);
+      reader = new InputStreamReader(fis, Charset.forName("UTF-8"));
+      out = new CountingOutputStream(
+          new DigestOutputStream(
+              new BufferedOutputStream(fout), digester));
+      OfflineImageReconstructor oir =
+          new OfflineImageReconstructor(out, reader);
+      oir.processXml();
+    } finally {
+      IOUtils.cleanup(LOG, reader, fis, out, fout);
+    }
+    // Write the md5 file
+    MD5FileUtils.saveMD5File(new File(outputPath),
+        new MD5Hash(digester.digest()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/700b0e40/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
index 4afbdb8..e184804 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
@@ -60,6 +60,8 @@ public class OfflineImageViewerPB {
       + "  * XML: This processor creates an XML document with all elements of\n"
       + "    the fsimage enumerated, suitable for further analysis by XML\n"
       + "    tools.\n"
+      + "  * reverseXML: This processor takes an XML file and creates a\n"
+      + "    binary fsimage containing the same elements.\n"
       + "  * FileDistribution: This processor analyzes the file size\n"
       + "    distribution in the image.\n"
       + "    -maxSize specifies the range [0, maxSize] of file sizes to be\n"
@@ -73,15 +75,18 @@ public class OfflineImageViewerPB {
       + "    changed via the -delimiter argument.\n"
       + "\n"
       + "Required command line arguments:\n"
-      + "-i,--inputFile <arg>   FSImage file to process.\n"
+      + "-i,--inputFile <arg>   FSImage or XML file to process.\n"
       + "\n"
       + "Optional command line arguments:\n"
       + "-o,--outputFile <arg>  Name of output file. If the specified\n"
       + "                       file exists, it will be overwritten.\n"
       + "                       (output to stdout by default)\n"
+      + "                       If the input file was an XML file, we\n"
+      + "                       will also create an <outputFile>.md5 file.\n"
       + "-p,--processor <arg>   Select which type of processor to apply\n"
-      + "                       against image file. (XML|FileDistribution|Web|Delimited)\n"
-      + "                       (Web by default)\n"
+      + "                       against image file. (XML|FileDistribution|\n"
+      + "                       ReverseXML|Web|Delimited)\n"
+      + "                       The default is Web.\n"
       + "-delimiter <arg>       Delimiting string to use with Delimited processor.  \n"
       + "-t,--temp <arg>        Use temporary dir to cache intermediate result to generate\n"
       + "                       Delimited outputs. If not set, Delimited processor constructs\n"
@@ -177,6 +182,16 @@ public class OfflineImageViewerPB {
           new PBImageXmlWriter(conf, out).visit(
               new RandomAccessFile(inputFile, "r"));
           break;
+        case "ReverseXML":
+          try {
+            OfflineImageReconstructor.run(inputFile, outputFile);
+          } catch (Exception e) {
+            System.err.println("OfflineImageReconstructor failed: " +
+                e.getMessage());
+            e.printStackTrace(System.err);
+            System.exit(1);
+          }
+          break;
         case "Web":
           String addr = cmd.getOptionValue("addr", "localhost:5978");
           try (WebImageViewer viewer = new WebImageViewer(
@@ -200,6 +215,7 @@ public class OfflineImageViewerPB {
       System.err.println("Input file ended unexpectedly. Exiting");
     } catch (IOException e) {
       System.err.println("Encountered exception.  Exiting: " + e.getMessage());
+      e.printStackTrace(System.err);
     }
     return -1;
   }