You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by vi...@apache.org on 2017/05/17 20:15:51 UTC

[28/29] hadoop git commit: HDFS-10706. [READ] Add tool generating FSImage from external store

HDFS-10706. [READ] Add tool generating FSImage from external store


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/616765e9
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/616765e9
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/616765e9

Branch: refs/heads/HDFS-9806
Commit: 616765e9683f122b3eb7c37e73239c7b8014ff1a
Parents: 2630e4f
Author: Virajith Jalaparti <vi...@apache.org>
Authored: Sat Apr 15 12:15:08 2017 -0700
Committer: Virajith Jalaparti <vi...@apache.org>
Committed: Wed May 17 12:41:52 2017 -0700

----------------------------------------------------------------------
 hadoop-tools/hadoop-fs2img/pom.xml              |  87 +++
 .../hdfs/server/namenode/BlockResolver.java     |  95 +++
 .../hadoop/hdfs/server/namenode/FSTreeWalk.java | 105 ++++
 .../hdfs/server/namenode/FileSystemImage.java   | 139 +++++
 .../FixedBlockMultiReplicaResolver.java         |  44 ++
 .../server/namenode/FixedBlockResolver.java     |  93 +++
 .../hdfs/server/namenode/FsUGIResolver.java     |  58 ++
 .../hdfs/server/namenode/ImageWriter.java       | 600 +++++++++++++++++++
 .../hdfs/server/namenode/NullBlockFormat.java   |  87 +++
 .../hdfs/server/namenode/SingleUGIResolver.java |  90 +++
 .../hadoop/hdfs/server/namenode/TreePath.java   | 167 ++++++
 .../hadoop/hdfs/server/namenode/TreeWalk.java   | 103 ++++
 .../hdfs/server/namenode/UGIResolver.java       | 131 ++++
 .../hdfs/server/namenode/package-info.java      |  23 +
 .../hdfs/server/namenode/RandomTreeWalk.java    | 186 ++++++
 .../server/namenode/TestFixedBlockResolver.java | 121 ++++
 .../server/namenode/TestRandomTreeWalk.java     | 130 ++++
 .../server/namenode/TestSingleUGIResolver.java  | 148 +++++
 .../src/test/resources/log4j.properties         |  24 +
 hadoop-tools/hadoop-tools-dist/pom.xml          |   6 +
 hadoop-tools/pom.xml                            |   1 +
 21 files changed, 2438 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/pom.xml b/hadoop-tools/hadoop-fs2img/pom.xml
new file mode 100644
index 0000000..36096b7
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/pom.xml
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<project>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-project</artifactId>
+    <version>3.0.0-alpha3-SNAPSHOT</version>
+    <relativePath>../../hadoop-project</relativePath>
+  </parent>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-fs2img</artifactId>
+  <version>3.0.0-alpha3-SNAPSHOT</version>
+  <description>fs2img</description>
+  <name>fs2img</name>
+  <packaging>jar</packaging>
+
+  <properties>
+    <hadoop.log.dir>${project.build.directory}/log</hadoop.log.dir>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-minicluster</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+         <configuration>
+          <archive>
+           <manifest>
+            <mainClass>org.apache.hadoop.hdfs.server.namenode.FileSystemImage</mainClass>
+           </manifest>
+         </archive>
+        </configuration>
+       </plugin>
+    </plugins>
+  </build>
+
+</project>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java
new file mode 100644
index 0000000..94b92b8
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/BlockResolver.java
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+
+/**
+ * Given an external reference, create a sequence of blocks and associated
+ * metadata.
+ */
+public abstract class BlockResolver {
+
+  protected BlockProto buildBlock(long blockId, long bytes) {
+    return buildBlock(blockId, bytes, 1001);
+  }
+
+  protected BlockProto buildBlock(long blockId, long bytes, long genstamp) {
+    BlockProto.Builder b = BlockProto.newBuilder()
+        .setBlockId(blockId)
+        .setNumBytes(bytes)
+        .setGenStamp(genstamp);
+    return b.build();
+  }
+
+  /**
+   * @param s the external reference.
+   * @return sequence of blocks that make up the reference.
+   */
+  public Iterable<BlockProto> resolve(FileStatus s) {
+    List<Long> lengths = blockLengths(s);
+    ArrayList<BlockProto> ret = new ArrayList<>(lengths.size());
+    long tot = 0;
+    for (long l : lengths) {
+      tot += l;
+      ret.add(buildBlock(nextId(), l));
+    }
+    if (tot != s.getLen()) {
+      // log a warning?
+      throw new IllegalStateException(
+          "Expected " + s.getLen() + " found " + tot);
+    }
+    return ret;
+  }
+
+  /**
+   * @return the next block id.
+   */
+  public abstract long nextId();
+
+  /**
+   * @return the maximum sequentially allocated block ID for this filesystem.
+   */
+  protected abstract long lastId();
+
+  /**
+   * @param status the external reference.
+   * @return the lengths of the resultant blocks.
+   */
+  protected abstract List<Long> blockLengths(FileStatus status);
+
+
+  /**
+   * @param status the external reference.
+   * @return the block size to assign to this external reference.
+   */
+  public long preferredBlockSize(FileStatus status) {
+    return status.getBlockSize();
+  }
+
+  /**
+   * @param status the external reference.
+   * @return the replication to assign to this external reference.
+   */
+  public abstract int getReplication(FileStatus status);
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java
new file mode 100644
index 0000000..f736112
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSTreeWalk.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.ConcurrentModificationException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Traversal of an external FileSystem.
+ */
+public class FSTreeWalk extends TreeWalk {
+
+  private final Path root;
+  private final FileSystem fs;
+
+  public FSTreeWalk(Path root, Configuration conf) throws IOException {
+    this.root = root;
+    fs = root.getFileSystem(conf);
+  }
+
+  @Override
+  protected Iterable<TreePath> getChildren(TreePath path, long id,
+      TreeIterator i) {
+    // TODO symlinks
+    if (!path.getFileStatus().isDirectory()) {
+      return Collections.emptyList();
+    }
+    try {
+      ArrayList<TreePath> ret = new ArrayList<>();
+      for (FileStatus s : fs.listStatus(path.getFileStatus().getPath())) {
+        ret.add(new TreePath(s, id, i));
+      }
+      return ret;
+    } catch (FileNotFoundException e) {
+      throw new ConcurrentModificationException("FS modified");
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  class FSTreeIterator extends TreeIterator {
+
+    private FSTreeIterator() {
+    }
+
+    FSTreeIterator(TreePath p) {
+      getPendingQueue().addFirst(
+          new TreePath(p.getFileStatus(), p.getParentId(), this));
+    }
+
+    FSTreeIterator(Path p) throws IOException {
+      try {
+        FileStatus s = fs.getFileStatus(root);
+        getPendingQueue().addFirst(new TreePath(s, -1L, this));
+      } catch (FileNotFoundException e) {
+        if (p.equals(root)) {
+          throw e;
+        }
+        throw new ConcurrentModificationException("FS modified");
+      }
+    }
+
+    @Override
+    public TreeIterator fork() {
+      if (getPendingQueue().isEmpty()) {
+        return new FSTreeIterator();
+      }
+      return new FSTreeIterator(getPendingQueue().removeFirst());
+    }
+
+  }
+
+  @Override
+  public TreeIterator iterator() {
+    try {
+      return new FSTreeIterator(root);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java
new file mode 100644
index 0000000..e1e85c1
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileSystemImage.java
@@ -0,0 +1,139 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.File;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Create FSImage from an external namespace.
+ */
+public class FileSystemImage implements Tool {
+
+  private Configuration conf;
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    // require absolute URI to write anywhere but local
+    FileSystem.setDefaultUri(conf, new File(".").toURI().toString());
+  }
+
+  protected void printUsage() {
+    HelpFormatter formatter = new HelpFormatter();
+    formatter.printHelp("fs2img [OPTIONS] URI", new Options());
+    formatter.setSyntaxPrefix("");
+    formatter.printHelp("Options", options());
+    ToolRunner.printGenericCommandUsage(System.out);
+  }
+
+  static Options options() {
+    Options options = new Options();
+    options.addOption("o", "outdir", true, "Output directory");
+    options.addOption("u", "ugiclass", true, "UGI resolver class");
+    options.addOption("b", "blockclass", true, "Block output class");
+    options.addOption("i", "blockidclass", true, "Block resolver class");
+    options.addOption("c", "cachedirs", true, "Max active dirents");
+    options.addOption("h", "help", false, "Print usage");
+    return options;
+  }
+
+  @Override
+  public int run(String[] argv) throws Exception {
+    Options options = options();
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd;
+    try {
+      cmd = parser.parse(options, argv);
+    } catch (ParseException e) {
+      System.out.println(
+          "Error parsing command-line options: " + e.getMessage());
+      printUsage();
+      return -1;
+    }
+
+    if (cmd.hasOption("h")) {
+      printUsage();
+      return -1;
+    }
+
+    ImageWriter.Options opts =
+        ReflectionUtils.newInstance(ImageWriter.Options.class, getConf());
+    for (Option o : cmd.getOptions()) {
+      switch (o.getOpt()) {
+      case "o":
+        opts.output(o.getValue());
+        break;
+      case "u":
+        opts.ugi(Class.forName(o.getValue()).asSubclass(UGIResolver.class));
+        break;
+      case "b":
+        opts.blocks(
+            Class.forName(o.getValue()).asSubclass(BlockFormat.class));
+        break;
+      case "i":
+        opts.blockIds(
+            Class.forName(o.getValue()).asSubclass(BlockResolver.class));
+        break;
+      case "c":
+        opts.cache(Integer.parseInt(o.getValue()));
+        break;
+      default:
+        throw new UnsupportedOperationException("Internal error");
+      }
+    }
+
+    String[] rem = cmd.getArgs();
+    if (rem.length != 1) {
+      printUsage();
+      return -1;
+    }
+
+    try (ImageWriter w = new ImageWriter(opts)) {
+      for (TreePath e : new FSTreeWalk(new Path(rem[0]), getConf())) {
+        w.accept(e); // add and continue
+      }
+    }
+    return 0;
+  }
+
+  public static void main(String[] argv) throws Exception {
+    int ret = ToolRunner.run(new FileSystemImage(), argv);
+    System.exit(ret);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java
new file mode 100644
index 0000000..0c8ce6e
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockMultiReplicaResolver.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Resolver mapping all files to a configurable, uniform blocksize
+ * and replication.
+ */
+public class FixedBlockMultiReplicaResolver extends FixedBlockResolver {
+
+  public static final String REPLICATION =
+      "hdfs.image.writer.resolver.fixed.block.replication";
+
+  private int replication;
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    replication = conf.getInt(REPLICATION, 1);
+  }
+
+  public int getReplication(FileStatus s) {
+    return replication;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java
new file mode 100644
index 0000000..8ff9695
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FixedBlockResolver.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Resolver mapping all files to a configurable, uniform blocksize.
+ */
+public class FixedBlockResolver extends BlockResolver implements Configurable {
+
+  public static final String BLOCKSIZE =
+      "hdfs.image.writer.resolver.fixed.block.size";
+  public static final String START_BLOCK =
+      "hdfs.image.writer.resolver.fixed.block.start";
+
+  private Configuration conf;
+  private long blocksize = 256 * (1L << 20);
+  private final AtomicLong blockIds = new AtomicLong(0);
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    blocksize = conf.getLong(BLOCKSIZE, 256 * (1L << 20));
+    blockIds.set(conf.getLong(START_BLOCK, (1L << 30)));
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  protected List<Long> blockLengths(FileStatus s) {
+    ArrayList<Long> ret = new ArrayList<>();
+    if (!s.isFile()) {
+      return ret;
+    }
+    if (0 == s.getLen()) {
+      // the file has length 0; so we will have one block of size 0
+      ret.add(0L);
+      return ret;
+    }
+    int nblocks = (int)((s.getLen() - 1) / blocksize) + 1;
+    for (int i = 0; i < nblocks - 1; ++i) {
+      ret.add(blocksize);
+    }
+    long rem = s.getLen() % blocksize;
+    ret.add(0 == (rem % blocksize) ? blocksize : rem);
+    return ret;
+  }
+
+  @Override
+  public long nextId() {
+    return blockIds.incrementAndGet();
+  }
+
+  @Override
+  public long lastId() {
+    return blockIds.get();
+  }
+
+  @Override
+  public long preferredBlockSize(FileStatus s) {
+    return blocksize;
+  }
+
+  @Override
+  public int getReplication(FileStatus s) {
+    return 1;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java
new file mode 100644
index 0000000..ca16d96
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsUGIResolver.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Dynamically assign ids to users/groups as they appear in the external
+ * filesystem.
+ */
+public class FsUGIResolver extends UGIResolver {
+
+  private int id;
+  private final Set<String> usernames;
+  private final Set<String> groupnames;
+
+  FsUGIResolver() {
+    super();
+    id = 0;
+    usernames = new HashSet<String>();
+    groupnames = new HashSet<String>();
+  }
+
+  @Override
+  public synchronized void addUser(String name) {
+    if (!usernames.contains(name)) {
+      addUser(name, id);
+      id++;
+      usernames.add(name);
+    }
+  }
+
+  @Override
+  public synchronized void addGroup(String name) {
+    if (!groupnames.contains(name)) {
+      addGroup(name, id);
+      id++;
+      groupnames.add(name);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java
new file mode 100644
index 0000000..a3603a1
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/ImageWriter.java
@@ -0,0 +1,600 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.BufferedOutputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FilterOutputStream;
+import java.io.OutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.concurrent.atomic.AtomicLong;
+
+import com.google.common.base.Charsets;
+import com.google.protobuf.CodedOutputStream;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.hdfs.server.common.FileRegion;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection.DirEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressorStream;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.util.StringUtils;
+
+import static org.apache.hadoop.hdfs.server.namenode.FSImageUtil.MAGIC_HEADER;
+
+/**
+ * Utility crawling an existing hierarchical FileSystem and emitting
+ * a valid FSImage/NN storage.
+ */
+// TODO: generalize to types beyond FileRegion
+public class ImageWriter implements Closeable {
+
+  private static final int ONDISK_VERSION = 1;
+  private static final int LAYOUT_VERSION = -64; // see NameNodeLayoutVersion
+
+  private final Path outdir;
+  private final FileSystem outfs;
+  private final File dirsTmp;
+  private final OutputStream dirs;
+  private final File inodesTmp;
+  private final OutputStream inodes;
+  private final MessageDigest digest;
+  private final FSImageCompression compress;
+  private final long startBlock;
+  private final long startInode;
+  private final UGIResolver ugis;
+  private final BlockFormat.Writer<FileRegion> blocks;
+  private final BlockResolver blockIds;
+  private final Map<Long, DirEntry.Builder> dircache;
+  private final TrackedOutputStream<DigestOutputStream> raw;
+
+  private boolean closed = false;
+  private long curSec;
+  private long curBlock;
+  private final AtomicLong curInode;
+  private final FileSummary.Builder summary = FileSummary.newBuilder()
+      .setOndiskVersion(ONDISK_VERSION)
+      .setLayoutVersion(LAYOUT_VERSION);
+
+  private final String blockPoolID;
+
+  public static Options defaults() {
+    return new Options();
+  }
+
+  @SuppressWarnings("unchecked")
+  public ImageWriter(Options opts) throws IOException {
+    final OutputStream out;
+    if (null == opts.outStream) {
+      FileSystem fs = opts.outdir.getFileSystem(opts.getConf());
+      outfs = (fs instanceof LocalFileSystem)
+          ? ((LocalFileSystem)fs).getRaw()
+          : fs;
+      Path tmp = opts.outdir;
+      if (!outfs.mkdirs(tmp)) {
+        throw new IOException("Failed to create output dir: " + tmp);
+      }
+      try (NNStorage stor = new NNStorage(opts.getConf(),
+          Arrays.asList(tmp.toUri()), Arrays.asList(tmp.toUri()))) {
+        NamespaceInfo info = NNStorage.newNamespaceInfo();
+        if (info.getLayoutVersion() != LAYOUT_VERSION) {
+          throw new IllegalStateException("Incompatible layout " +
+              info.getLayoutVersion() + " (expected " + LAYOUT_VERSION);
+        }
+        stor.format(info);
+        blockPoolID = info.getBlockPoolID();
+      }
+      outdir = new Path(tmp, "current");
+      out = outfs.create(new Path(outdir, "fsimage_0000000000000000000"));
+    } else {
+      // XXX necessary? writing a NNStorage now...
+      outdir = null;
+      outfs = null;
+      out = opts.outStream;
+      blockPoolID = "";
+    }
+    digest = MD5Hash.getDigester();
+    raw = new TrackedOutputStream<>(new DigestOutputStream(
+            new BufferedOutputStream(out), digest));
+    compress = opts.compress;
+    CompressionCodec codec = compress.getImageCodec();
+    if (codec != null) {
+      summary.setCodec(codec.getClass().getCanonicalName());
+    }
+    startBlock = opts.startBlock;
+    curBlock = startBlock;
+    startInode = opts.startInode;
+    curInode = new AtomicLong(startInode);
+    dircache = Collections.synchronizedMap(new DirEntryCache(opts.maxdircache));
+
+    ugis = null == opts.ugis
+        ? ReflectionUtils.newInstance(opts.ugisClass, opts.getConf())
+        : opts.ugis;
+    BlockFormat<FileRegion> fmt = null == opts.blocks
+        ? ReflectionUtils.newInstance(opts.blockFormatClass, opts.getConf())
+        : opts.blocks;
+    blocks = fmt.getWriter(null);
+    blockIds = null == opts.blockIds
+        ? ReflectionUtils.newInstance(opts.blockIdsClass, opts.getConf())
+        : opts.blockIds;
+
+    // create directory and inode sections as side-files.
+    // The details are written to files to avoid keeping them in memory.
+    dirsTmp = File.createTempFile("fsimg_dir", null);
+    dirsTmp.deleteOnExit();
+    dirs = beginSection(new FileOutputStream(dirsTmp));
+    try {
+      inodesTmp = File.createTempFile("fsimg_inode", null);
+      inodesTmp.deleteOnExit();
+      inodes = new FileOutputStream(inodesTmp);
+    } catch (IOException e) {
+      // appropriate to close raw?
+      IOUtils.cleanup(null, raw, dirs);
+      throw e;
+    }
+
+    raw.write(MAGIC_HEADER);
+    curSec = raw.pos;
+    assert raw.pos == MAGIC_HEADER.length;
+  }
+
+  public void accept(TreePath e) throws IOException {
+    assert e.getParentId() < curInode.get();
+    // allocate ID
+    long id = curInode.getAndIncrement();
+    e.accept(id);
+    assert e.getId() < curInode.get();
+    INode n = e.toINode(ugis, blockIds, blocks, blockPoolID);
+    writeInode(n);
+
+    if (e.getParentId() > 0) {
+      // add DirEntry to map, which may page out entries
+      DirEntry.Builder de = DirEntry.newBuilder()
+          .setParent(e.getParentId())
+          .addChildren(e.getId());
+      dircache.put(e.getParentId(), de);
+    }
+  }
+
+  @SuppressWarnings("serial")
+  class DirEntryCache extends LinkedHashMap<Long, DirEntry.Builder> {
+
+    // should cache path to root, not evict LRCached
+    private final int nEntries;
+
+    DirEntryCache(int nEntries) {
+      this.nEntries = nEntries;
+    }
+
+    @Override
+    public DirEntry.Builder put(Long p, DirEntry.Builder b) {
+      DirEntry.Builder e = get(p);
+      if (null == e) {
+        return super.put(p, b);
+      }
+      //merge
+      e.addAllChildren(b.getChildrenList());
+      // not strictly conforming
+      return e;
+    }
+
+    @Override
+    protected boolean removeEldestEntry(Entry<Long, DirEntry.Builder> be) {
+      if (size() > nEntries) {
+        DirEntry d = be.getValue().build();
+        try {
+          writeDirEntry(d);
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        return true;
+      }
+      return false;
+    }
+  }
+
+  synchronized void writeInode(INode n) throws IOException {
+    n.writeDelimitedTo(inodes);
+  }
+
+  synchronized void writeDirEntry(DirEntry e) throws IOException {
+    e.writeDelimitedTo(dirs);
+  }
+
+  // from FSImageFormatProtobuf... why not just read position from the stream?
+  private static int getOndiskSize(com.google.protobuf.GeneratedMessage s) {
+    return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize())
+        + s.getSerializedSize();
+  }
+
+  @Override
+  public synchronized void close() throws IOException {
+    if (closed) {
+      return;
+    }
+    for (DirEntry.Builder b : dircache.values()) {
+      DirEntry e = b.build();
+      writeDirEntry(e);
+    }
+    dircache.clear();
+
+    // close side files
+    IOUtils.cleanup(null, dirs, inodes, blocks);
+    if (null == dirs || null == inodes) {
+      // init failed
+      if (raw != null) {
+        raw.close();
+      }
+      return;
+    }
+    try {
+      writeNameSystemSection();
+      writeINodeSection();
+      writeDirSection();
+      writeStringTableSection();
+
+      // write summary directly to raw
+      FileSummary s = summary.build();
+      s.writeDelimitedTo(raw);
+      int length = getOndiskSize(s);
+      byte[] lengthBytes = new byte[4];
+      ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length);
+      raw.write(lengthBytes);
+    } finally {
+      raw.close();
+    }
+    writeMD5("fsimage_0000000000000000000");
+    closed = true;
+  }
+
+  /**
+   * Write checksum for image file. Pulled from MD5Utils/internals. Awkward to
+   * reuse existing tools/utils.
+   */
+  void writeMD5(String imagename) throws IOException {
+    if (null == outdir) {
+      //LOG.warn("Not writing MD5");
+      return;
+    }
+    MD5Hash md5 = new MD5Hash(digest.digest());
+    String digestString = StringUtils.byteToHexString(md5.getDigest());
+    Path chk = new Path(outdir, imagename + ".md5");
+    try (OutputStream out = outfs.create(chk)) {
+      String md5Line = digestString + " *" + imagename + "\n";
+      out.write(md5Line.getBytes(Charsets.UTF_8));
+    }
+  }
+
+  OutputStream beginSection(OutputStream out) throws IOException {
+    CompressionCodec codec = compress.getImageCodec();
+    if (null == codec) {
+      return out;
+    }
+    return codec.createOutputStream(out);
+  }
+
+  void endSection(OutputStream out, SectionName name) throws IOException {
+    CompressionCodec codec = compress.getImageCodec();
+    if (codec != null) {
+      ((CompressorStream)out).finish();
+    }
+    out.flush();
+    long length = raw.pos - curSec;
+    summary.addSections(FileSummary.Section.newBuilder()
+        .setName(name.toString()) // not strictly correct, but name not visible
+        .setOffset(curSec).setLength(length));
+    curSec += length;
+  }
+
+  void writeNameSystemSection() throws IOException {
+    NameSystemSection.Builder b = NameSystemSection.newBuilder()
+        .setGenstampV1(1000)
+        .setGenstampV1Limit(0)
+        .setGenstampV2(1001)
+        .setLastAllocatedBlockId(blockIds.lastId())
+        .setTransactionId(0);
+    NameSystemSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    endSection(sec, SectionName.NS_INFO);
+  }
+
+  void writeINodeSection() throws IOException {
+    // could reset dict to avoid compression cost in close
+    INodeSection.Builder b = INodeSection.newBuilder()
+        .setNumInodes(curInode.get() - startInode)
+        .setLastInodeId(curInode.get());
+    INodeSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    // copy inodes
+    try (FileInputStream in = new FileInputStream(inodesTmp)) {
+      IOUtils.copyBytes(in, sec, 4096, false);
+    }
+    endSection(sec, SectionName.INODE);
+  }
+
+  void writeDirSection() throws IOException {
+    // No header, so dirs can be written/compressed independently
+    //INodeDirectorySection.Builder b = INodeDirectorySection.newBuilder();
+    OutputStream sec = raw;
+    // copy dirs
+    try (FileInputStream in = new FileInputStream(dirsTmp)) {
+      IOUtils.copyBytes(in, sec, 4096, false);
+    }
+    endSection(sec, SectionName.INODE_DIR);
+  }
+
+  void writeFilesUCSection() throws IOException {
+    FilesUnderConstructionSection.Builder b =
+        FilesUnderConstructionSection.newBuilder();
+    FilesUnderConstructionSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    endSection(sec, SectionName.FILES_UNDERCONSTRUCTION);
+  }
+
+  void writeSnapshotDiffSection() throws IOException {
+    SnapshotDiffSection.Builder b = SnapshotDiffSection.newBuilder();
+    SnapshotDiffSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    endSection(sec, SectionName.SNAPSHOT_DIFF);
+  }
+
+  void writeSecretManagerSection() throws IOException {
+    SecretManagerSection.Builder b = SecretManagerSection.newBuilder()
+        .setCurrentId(0)
+        .setTokenSequenceNumber(0);
+    SecretManagerSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    endSection(sec, SectionName.SECRET_MANAGER);
+  }
+
+  void writeCacheManagerSection() throws IOException {
+    CacheManagerSection.Builder b = CacheManagerSection.newBuilder()
+        .setNumPools(0)
+        .setNumDirectives(0)
+        .setNextDirectiveId(1);
+    CacheManagerSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    endSection(sec, SectionName.CACHE_MANAGER);
+  }
+
+  void writeStringTableSection() throws IOException {
+    StringTableSection.Builder b = StringTableSection.newBuilder();
+    Map<Integer, String> u = ugis.ugiMap();
+    b.setNumEntry(u.size());
+    StringTableSection s = b.build();
+
+    OutputStream sec = beginSection(raw);
+    s.writeDelimitedTo(sec);
+    for (Map.Entry<Integer, String> e : u.entrySet()) {
+      StringTableSection.Entry.Builder x =
+          StringTableSection.Entry.newBuilder()
+              .setId(e.getKey())
+              .setStr(e.getValue());
+      x.build().writeDelimitedTo(sec);
+    }
+    endSection(sec, SectionName.STRING_TABLE);
+  }
+
+  @Override
+  public synchronized String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("{ codec=\"").append(compress.getImageCodec());
+    sb.append("\", startBlock=").append(startBlock);
+    sb.append(", curBlock=").append(curBlock);
+    sb.append(", startInode=").append(startInode);
+    sb.append(", curInode=").append(curInode);
+    sb.append(", ugi=").append(ugis);
+    sb.append(", blockIds=").append(blockIds);
+    sb.append(", offset=").append(raw.pos);
+    sb.append(" }");
+    return sb.toString();
+  }
+
+  static class TrackedOutputStream<T extends OutputStream>
+      extends FilterOutputStream {
+
+    private long pos = 0L;
+
+    TrackedOutputStream(T out) {
+      super(out);
+    }
+
+    @SuppressWarnings("unchecked")
+    public T getInner() {
+      return (T) out;
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+      out.write(b);
+      ++pos;
+    }
+
+    @Override
+    public void write(byte[] b) throws IOException {
+      write(b, 0, b.length);
+    }
+
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+      out.write(b, off, len);
+      pos += len;
+    }
+
+    @Override
+    public void flush() throws IOException {
+      super.flush();
+    }
+
+    @Override
+    public void close() throws IOException {
+      super.close();
+    }
+
+  }
+
+  /**
+   * Configurable options for image generation mapping pluggable components.
+   */
+  public static class Options implements Configurable {
+
+    public static final String START_INODE = "hdfs.image.writer.start.inode";
+    public static final String CACHE_ENTRY = "hdfs.image.writer.cache.entries";
+    public static final String UGI_CLASS   = "hdfs.image.writer.ugi.class";
+    public static final String BLOCK_RESOLVER_CLASS =
+        "hdfs.image.writer.blockresolver.class";
+
+    private Path outdir;
+    private Configuration conf;
+    private OutputStream outStream;
+    private int maxdircache;
+    private long startBlock;
+    private long startInode;
+    private UGIResolver ugis;
+    private Class<? extends UGIResolver> ugisClass;
+    private BlockFormat<FileRegion> blocks;
+
+    @SuppressWarnings("rawtypes")
+    private Class<? extends BlockFormat> blockFormatClass;
+    private BlockResolver blockIds;
+    private Class<? extends BlockResolver> blockIdsClass;
+    private FSImageCompression compress =
+        FSImageCompression.createNoopCompression();
+
+    protected Options() {
+    }
+
+    @Override
+    public void setConf(Configuration conf) {
+      this.conf = conf;
+      //long lastTxn = conf.getLong(LAST_TXN, 0L);
+      String def = new File("hdfs/name").toURI().toString();
+      outdir = new Path(conf.get(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, def));
+      startBlock = conf.getLong(FixedBlockResolver.START_BLOCK, (1L << 30) + 1);
+      startInode = conf.getLong(START_INODE, (1L << 14) + 1);
+      maxdircache = conf.getInt(CACHE_ENTRY, 100);
+      ugisClass = conf.getClass(UGI_CLASS,
+          SingleUGIResolver.class, UGIResolver.class);
+      blockFormatClass = conf.getClass(
+          DFSConfigKeys.DFS_PROVIDER_BLK_FORMAT_CLASS,
+          NullBlockFormat.class, BlockFormat.class);
+      blockIdsClass = conf.getClass(BLOCK_RESOLVER_CLASS,
+          FixedBlockResolver.class, BlockResolver.class);
+    }
+
+    @Override
+    public Configuration getConf() {
+      return conf;
+    }
+
+    public Options output(String out) {
+      this.outdir = new Path(out);
+      return this;
+    }
+
+    public Options outStream(OutputStream outStream) {
+      this.outStream = outStream;
+      return this;
+    }
+
+    public Options codec(String codec) throws IOException {
+      this.compress = FSImageCompression.createCompression(getConf(), codec);
+      return this;
+    }
+
+    public Options cache(int nDirEntries) {
+      this.maxdircache = nDirEntries;
+      return this;
+    }
+
+    public Options ugi(UGIResolver ugis) {
+      this.ugis = ugis;
+      return this;
+    }
+
+    public Options ugi(Class<? extends UGIResolver> ugisClass) {
+      this.ugisClass = ugisClass;
+      return this;
+    }
+
+    public Options blockIds(BlockResolver blockIds) {
+      this.blockIds = blockIds;
+      return this;
+    }
+
+    public Options blockIds(Class<? extends BlockResolver> blockIdsClass) {
+      this.blockIdsClass = blockIdsClass;
+      return this;
+    }
+
+    public Options blocks(BlockFormat<FileRegion> blocks) {
+      this.blocks = blocks;
+      return this;
+    }
+
+    @SuppressWarnings("rawtypes")
+    public Options blocks(Class<? extends BlockFormat> blocksClass) {
+      this.blockFormatClass = blocksClass;
+      return this;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java
new file mode 100644
index 0000000..aabdf74
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/NullBlockFormat.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.hdfs.server.common.BlockFormat.Reader.Options;
+import org.apache.hadoop.hdfs.server.common.FileRegion;
+
+/**
+ * Null sink for region information emitted from FSImage.
+ */
+public class NullBlockFormat extends BlockFormat<FileRegion> {
+
+  @Override
+  public Reader<FileRegion> getReader(Options opts) throws IOException {
+    return new Reader<FileRegion>() {
+      @Override
+      public Iterator<FileRegion> iterator() {
+        return new Iterator<FileRegion>() {
+          @Override
+          public boolean hasNext() {
+            return false;
+          }
+          @Override
+          public FileRegion next() {
+            throw new NoSuchElementException();
+          }
+          @Override
+          public void remove() {
+            throw new UnsupportedOperationException();
+          }
+        };
+      }
+
+      @Override
+      public void close() throws IOException {
+        // do nothing
+      }
+
+      @Override
+      public FileRegion resolve(Block ident) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+    };
+  }
+
+  @Override
+  public Writer<FileRegion> getWriter(Writer.Options opts) throws IOException {
+    return new Writer<FileRegion>() {
+      @Override
+      public void store(FileRegion token) throws IOException {
+        // do nothing
+      }
+
+      @Override
+      public void close() throws IOException {
+        // do nothing
+      }
+    };
+  }
+
+  @Override
+  public void refresh() throws IOException {
+    // do nothing
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java
new file mode 100644
index 0000000..0fd3f2b
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/SingleUGIResolver.java
@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * Map all owners/groups in external system to a single user in FSImage.
+ */
+public class SingleUGIResolver extends UGIResolver implements Configurable {
+
+  public static final String UID   = "hdfs.image.writer.ugi.single.uid";
+  public static final String USER  = "hdfs.image.writer.ugi.single.user";
+  public static final String GID   = "hdfs.image.writer.ugi.single.gid";
+  public static final String GROUP = "hdfs.image.writer.ugi.single.group";
+
+  private int uid;
+  private int gid;
+  private String user;
+  private String group;
+  private Configuration conf;
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+    uid = conf.getInt(UID, 0);
+    user = conf.get(USER);
+    if (null == user) {
+      try {
+        user = UserGroupInformation.getCurrentUser().getShortUserName();
+      } catch (IOException e) {
+        user = "hadoop";
+      }
+    }
+    gid = conf.getInt(GID, 1);
+    group = conf.get(GROUP);
+    if (null == group) {
+      group = user;
+    }
+
+    resetUGInfo();
+    addUser(user, uid);
+    addGroup(group, gid);
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public String user(FileStatus s) {
+    return user;
+  }
+
+  @Override
+  public String group(FileStatus s) {
+    return group;
+  }
+
+  @Override
+  public void addUser(String name) {
+    //do nothing
+  }
+
+  @Override
+  public void addGroup(String name) {
+    //do nothing
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java
new file mode 100644
index 0000000..14e6bed
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreePath.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import com.google.protobuf.ByteString;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.common.BlockFormat;
+import org.apache.hadoop.hdfs.server.common.FileRegion;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
+import static org.apache.hadoop.hdfs.DFSUtil.string2Bytes;
+import static org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature.DEFAULT_NAMESPACE_QUOTA;
+import static org.apache.hadoop.hdfs.server.namenode.DirectoryWithQuotaFeature.DEFAULT_STORAGE_SPACE_QUOTA;
+
+/**
+ * Traversal cursor in external filesystem.
+ * TODO: generalize, move FS/FileRegion to FSTreePath
+ */
+public class TreePath {
+  private long id = -1;
+  private final long parentId;
+  private final FileStatus stat;
+  private final TreeWalk.TreeIterator i;
+
+  protected TreePath(FileStatus stat, long parentId, TreeWalk.TreeIterator i) {
+    this.i = i;
+    this.stat = stat;
+    this.parentId = parentId;
+  }
+
+  public FileStatus getFileStatus() {
+    return stat;
+  }
+
+  public long getParentId() {
+    return parentId;
+  }
+
+  public long getId() {
+    if (id < 0) {
+      throw new IllegalStateException();
+    }
+    return id;
+  }
+
+  void accept(long id) {
+    this.id = id;
+    i.onAccept(this, id);
+  }
+
+  public INode toINode(UGIResolver ugi, BlockResolver blk,
+      BlockFormat.Writer<FileRegion> out, String blockPoolID)
+          throws IOException {
+    if (stat.isFile()) {
+      return toFile(ugi, blk, out, blockPoolID);
+    } else if (stat.isDirectory()) {
+      return toDirectory(ugi);
+    } else if (stat.isSymlink()) {
+      throw new UnsupportedOperationException("symlinks not supported");
+    } else {
+      throw new UnsupportedOperationException("Unknown type: " + stat);
+    }
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof TreePath)) {
+      return false;
+    }
+    TreePath o = (TreePath) other;
+    return getParentId() == o.getParentId()
+      && getFileStatus().equals(o.getFileStatus());
+  }
+
+  @Override
+  public int hashCode() {
+    long pId = getParentId() * getFileStatus().hashCode();
+    return (int)(pId ^ (pId >>> 32));
+  }
+
+  void writeBlock(long blockId, long offset, long length,
+      long genStamp, String blockPoolID,
+      BlockFormat.Writer<FileRegion> out) throws IOException {
+    FileStatus s = getFileStatus();
+    out.store(new FileRegion(blockId, s.getPath(), offset, length,
+        blockPoolID, genStamp));
+  }
+
+  INode toFile(UGIResolver ugi, BlockResolver blk,
+      BlockFormat.Writer<FileRegion> out, String blockPoolID)
+          throws IOException {
+    final FileStatus s = getFileStatus();
+    // TODO should this store resolver's user/group?
+    ugi.addUser(s.getOwner());
+    ugi.addGroup(s.getGroup());
+    INodeFile.Builder b = INodeFile.newBuilder()
+        .setReplication(blk.getReplication(s))
+        .setModificationTime(s.getModificationTime())
+        .setAccessTime(s.getAccessTime())
+        .setPreferredBlockSize(blk.preferredBlockSize(s))
+        .setPermission(ugi.resolve(s))
+        .setStoragePolicyID(HdfsConstants.PROVIDED_STORAGE_POLICY_ID);
+    //TODO: storage policy should be configurable per path; use BlockResolver
+    long off = 0L;
+    for (BlockProto block : blk.resolve(s)) {
+      b.addBlocks(block);
+      writeBlock(block.getBlockId(), off, block.getNumBytes(),
+          block.getGenStamp(), blockPoolID, out);
+      off += block.getNumBytes();
+    }
+    INode.Builder ib = INode.newBuilder()
+        .setType(INode.Type.FILE)
+        .setId(id)
+        .setName(ByteString.copyFrom(string2Bytes(s.getPath().getName())))
+        .setFile(b);
+    return ib.build();
+  }
+
+  INode toDirectory(UGIResolver ugi) {
+    final FileStatus s = getFileStatus();
+    ugi.addUser(s.getOwner());
+    ugi.addGroup(s.getGroup());
+    INodeDirectory.Builder b = INodeDirectory.newBuilder()
+        .setModificationTime(s.getModificationTime())
+        .setNsQuota(DEFAULT_NAMESPACE_QUOTA)
+        .setDsQuota(DEFAULT_STORAGE_SPACE_QUOTA)
+        .setPermission(ugi.resolve(s));
+    INode.Builder ib = INode.newBuilder()
+        .setType(INode.Type.DIRECTORY)
+        .setId(id)
+        .setName(ByteString.copyFrom(string2Bytes(s.getPath().getName())))
+        .setDirectory(b);
+    return ib.build();
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("{ stat=\"").append(getFileStatus()).append("\"");
+    sb.append(", id=").append(getId());
+    sb.append(", parentId=").append(getParentId());
+    sb.append(", iterObjId=").append(System.identityHashCode(i));
+    sb.append(" }");
+    return sb.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java
new file mode 100644
index 0000000..7fd26f9
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/TreeWalk.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.Iterator;
+
+/**
+ * Traversal yielding a hierarchical sequence of paths.
+ */
+public abstract class TreeWalk implements Iterable<TreePath> {
+
+  /**
+   * @param path path to the node being explored.
+   * @param id the id of the node.
+   * @param iterator the {@link TreeIterator} to use.
+   * @return paths representing the children of the current node.
+   */
+  protected abstract Iterable<TreePath> getChildren(
+      TreePath path, long id, TreeWalk.TreeIterator iterator);
+
+  public abstract TreeIterator iterator();
+
+  /**
+   * Enumerator class for hierarchies. Implementations SHOULD support a fork()
+   * operation yielding a subtree of the current cursor.
+   */
+  public abstract class TreeIterator implements Iterator<TreePath> {
+
+    private final Deque<TreePath> pending;
+
+    TreeIterator() {
+      this(new ArrayDeque<TreePath>());
+    }
+
+    protected TreeIterator(Deque<TreePath> pending) {
+      this.pending = pending;
+    }
+
+    public abstract TreeIterator fork();
+
+    @Override
+    public boolean hasNext() {
+      return !pending.isEmpty();
+    }
+
+    @Override
+    public TreePath next() {
+      return pending.removeFirst();
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    protected void onAccept(TreePath p, long id) {
+      for (TreePath k : getChildren(p, id, this)) {
+        pending.addFirst(k);
+      }
+    }
+
+    /**
+     * @return the Deque containing the pending paths.
+     */
+    protected Deque<TreePath> getPendingQueue() {
+      return pending;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("{ Treewalk=\"").append(TreeWalk.this.toString());
+      sb.append(", pending=[");
+      Iterator<TreePath> i = pending.iterator();
+      if (i.hasNext()) {
+        sb.append("\"").append(i.next()).append("\"");
+      }
+      while (i.hasNext()) {
+        sb.append(", \"").append(i.next()).append("\"");
+      }
+      sb.append("]");
+      sb.append(" }");
+      return sb.toString();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java
new file mode 100644
index 0000000..2d50668
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/UGIResolver.java
@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.permission.FsPermission;
+
+/**
+ * Pluggable class for mapping ownership and permissions from an external
+ * store to an FSImage.
+ */
+public abstract class UGIResolver {
+
+  static final int USER_STRID_OFFSET = 40;
+  static final int GROUP_STRID_OFFSET = 16;
+  static final long USER_GROUP_STRID_MASK = (1 << 24) - 1;
+
+  /**
+   * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big
+   * Endian).
+   * The first and the second parts are the string ids of the user and
+   * group name, and the last 16 bits are the permission bits.
+   * @param owner name of owner
+   * @param group name of group
+   * @param permission Permission octects
+   * @return FSImage encoding of permissions
+   */
+  protected final long buildPermissionStatus(
+      String owner, String group, short permission) {
+
+    long userId = users.get(owner);
+    if (0L != ((~USER_GROUP_STRID_MASK) & userId)) {
+      throw new IllegalArgumentException("UID must fit in 24 bits");
+    }
+
+    long groupId = groups.get(group);
+    if (0L != ((~USER_GROUP_STRID_MASK) & groupId)) {
+      throw new IllegalArgumentException("GID must fit in 24 bits");
+    }
+    return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET)
+        | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET)
+        | permission;
+  }
+
+  private final Map<String, Integer> users;
+  private final Map<String, Integer> groups;
+
+  public UGIResolver() {
+    this(new HashMap<String, Integer>(), new HashMap<String, Integer>());
+  }
+
+  UGIResolver(Map<String, Integer> users, Map<String, Integer> groups) {
+    this.users = users;
+    this.groups = groups;
+  }
+
+  public Map<Integer, String> ugiMap() {
+    Map<Integer, String> ret = new HashMap<>();
+    for (Map<String, Integer> m : Arrays.asList(users, groups)) {
+      for (Map.Entry<String, Integer> e : m.entrySet()) {
+        String s = ret.put(e.getValue(), e.getKey());
+        if (s != null) {
+          throw new IllegalStateException("Duplicate mapping: " +
+              e.getValue() + " " + s + " " + e.getKey());
+        }
+      }
+    }
+    return ret;
+  }
+
+  public abstract void addUser(String name);
+
+  protected void addUser(String name, int id) {
+    Integer uid = users.put(name, id);
+    if (uid != null) {
+      throw new IllegalArgumentException("Duplicate mapping: " + name +
+          " " + uid + " " + id);
+    }
+  }
+
+  public abstract void addGroup(String name);
+
+  protected void addGroup(String name, int id) {
+    Integer gid = groups.put(name, id);
+    if (gid != null) {
+      throw new IllegalArgumentException("Duplicate mapping: " + name +
+          " " + gid + " " + id);
+    }
+  }
+
+  protected void resetUGInfo() {
+    users.clear();
+    groups.clear();
+  }
+
+  public long resolve(FileStatus s) {
+    return buildPermissionStatus(user(s), group(s), permission(s).toShort());
+  }
+
+  public String user(FileStatus s) {
+    return s.getOwner();
+  }
+
+  public String group(FileStatus s) {
+    return s.getGroup();
+  }
+
+  public FsPermission permission(FileStatus s) {
+    return s.getPermission();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java
new file mode 100644
index 0000000..956292e
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/main/java/org/apache/hadoop/hdfs/server/namenode/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+package org.apache.hadoop.hdfs.server.namenode;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java
new file mode 100644
index 0000000..c82c489
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/RandomTreeWalk.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Random, repeatable hierarchy generator.
+ */
+public class RandomTreeWalk extends TreeWalk {
+
+  private final Path root;
+  private final long seed;
+  private final float depth;
+  private final int children;
+  private final Map<Long, Long> mSeed;
+  //private final AtomicLong blockIds = new AtomicLong(1L << 30);
+
+  RandomTreeWalk(long seed) {
+    this(seed, 10);
+  }
+
+  RandomTreeWalk(long seed, int children) {
+    this(seed, children, 0.15f);
+  }
+
+  RandomTreeWalk(long seed, int children, float depth) {
+    this(randomRoot(seed), seed, children, 0.15f);
+  }
+
+  RandomTreeWalk(Path root, long seed, int children, float depth) {
+    this.seed = seed;
+    this.depth = depth;
+    this.children = children;
+    mSeed = Collections.synchronizedMap(new HashMap<Long, Long>());
+    mSeed.put(-1L, seed);
+    this.root = root;
+  }
+
+  static Path randomRoot(long seed) {
+    Random r = new Random(seed);
+    String scheme;
+    do {
+      scheme = genName(r, 3, 5).toLowerCase();
+    } while (Character.isDigit(scheme.charAt(0)));
+    String authority = genName(r, 3, 15).toLowerCase();
+    int port = r.nextInt(1 << 13) + 1000;
+    return new Path(scheme, authority + ":" + port, "/");
+  }
+
+  @Override
+  public TreeIterator iterator() {
+    return new RandomTreeIterator(seed);
+  }
+
+  @Override
+  protected Iterable<TreePath> getChildren(TreePath p, long id,
+      TreeIterator walk) {
+    final FileStatus pFs = p.getFileStatus();
+    if (pFs.isFile()) {
+      return Collections.emptyList();
+    }
+    // seed is f(parent seed, attrib)
+    long cseed = mSeed.get(p.getParentId()) * p.getFileStatus().hashCode();
+    mSeed.put(p.getId(), cseed);
+    Random r = new Random(cseed);
+
+    int nChildren = r.nextInt(children);
+    ArrayList<TreePath> ret = new ArrayList<TreePath>();
+    for (int i = 0; i < nChildren; ++i) {
+      ret.add(new TreePath(genFileStatus(p, r), p.getId(), walk));
+    }
+    return ret;
+  }
+
+  FileStatus genFileStatus(TreePath parent, Random r) {
+    final int blocksize = 128 * (1 << 20);
+    final Path name;
+    final boolean isDir;
+    if (null == parent) {
+      name = root;
+      isDir = true;
+    } else {
+      Path p = parent.getFileStatus().getPath();
+      name = new Path(p, genName(r, 3, 10));
+      isDir = r.nextFloat() < depth;
+    }
+    final long len = isDir ? 0 : r.nextInt(Integer.MAX_VALUE);
+    final int nblocks = 0 == len ? 0 : (((int)((len - 1) / blocksize)) + 1);
+    BlockLocation[] blocks = genBlocks(r, nblocks, blocksize, len);
+    try {
+      return new LocatedFileStatus(new FileStatus(
+          len,              /* long length,             */
+          isDir,            /* boolean isdir,           */
+          1,                /* int block_replication,   */
+          blocksize,        /* long blocksize,          */
+          0L,               /* long modification_time,  */
+          0L,               /* long access_time,        */
+          null,             /* FsPermission permission, */
+          "hadoop",         /* String owner,            */
+          "hadoop",         /* String group,            */
+          name),            /* Path path                */
+          blocks);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  BlockLocation[] genBlocks(Random r, int nblocks, int blocksize, long len) {
+    BlockLocation[] blocks = new BlockLocation[nblocks];
+    if (0 == nblocks) {
+      return blocks;
+    }
+    for (int i = 0; i < nblocks - 1; ++i) {
+      blocks[i] = new BlockLocation(null, null, i * blocksize, blocksize);
+    }
+    blocks[nblocks - 1] = new BlockLocation(null, null,
+        (nblocks - 1) * blocksize,
+        0 == (len % blocksize) ? blocksize : len % blocksize);
+    return blocks;
+  }
+
+  static String genName(Random r, int min, int max) {
+    int len = r.nextInt(max - min + 1) + min;
+    char[] ret = new char[len];
+    while (len > 0) {
+      int c = r.nextInt() & 0x7F; // restrict to ASCII
+      if (Character.isLetterOrDigit(c)) {
+        ret[--len] = (char) c;
+      }
+    }
+    return new String(ret);
+  }
+
+  class RandomTreeIterator extends TreeIterator {
+
+    RandomTreeIterator() {
+    }
+
+    RandomTreeIterator(long seed) {
+      Random r = new Random(seed);
+      FileStatus iroot = genFileStatus(null, r);
+      getPendingQueue().addFirst(new TreePath(iroot, -1, this));
+    }
+
+    RandomTreeIterator(TreePath p) {
+      getPendingQueue().addFirst(
+          new TreePath(p.getFileStatus(), p.getParentId(), this));
+    }
+
+    @Override
+    public TreeIterator fork() {
+      if (getPendingQueue().isEmpty()) {
+        return new RandomTreeIterator();
+      }
+      return new RandomTreeIterator(getPendingQueue().removeFirst());
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java
new file mode 100644
index 0000000..8b52ffd
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFixedBlockResolver.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import static org.junit.Assert.*;
+
+/**
+ * Validate fixed-size block partitioning.
+ */
+public class TestFixedBlockResolver {
+
+  @Rule public TestName name = new TestName();
+
+  private final FixedBlockResolver blockId = new FixedBlockResolver();
+
+  @Before
+  public void setup() {
+    Configuration conf = new Configuration(false);
+    conf.setLong(FixedBlockResolver.BLOCKSIZE, 512L * (1L << 20));
+    conf.setLong(FixedBlockResolver.START_BLOCK, 512L * (1L << 20));
+    blockId.setConf(conf);
+    System.out.println(name.getMethodName());
+  }
+
+  @Test
+  public void testExactBlock() throws Exception {
+    FileStatus f = file(512, 256);
+    int nblocks = 0;
+    for (BlockProto b : blockId.resolve(f)) {
+      ++nblocks;
+      assertEquals(512L * (1L << 20), b.getNumBytes());
+    }
+    assertEquals(1, nblocks);
+
+    FileStatus g = file(1024, 256);
+    nblocks = 0;
+    for (BlockProto b : blockId.resolve(g)) {
+      ++nblocks;
+      assertEquals(512L * (1L << 20), b.getNumBytes());
+    }
+    assertEquals(2, nblocks);
+
+    FileStatus h = file(5120, 256);
+    nblocks = 0;
+    for (BlockProto b : blockId.resolve(h)) {
+      ++nblocks;
+      assertEquals(512L * (1L << 20), b.getNumBytes());
+    }
+    assertEquals(10, nblocks);
+  }
+
+  @Test
+  public void testEmpty() throws Exception {
+    FileStatus f = file(0, 100);
+    Iterator<BlockProto> b = blockId.resolve(f).iterator();
+    assertTrue(b.hasNext());
+    assertEquals(0, b.next().getNumBytes());
+    assertFalse(b.hasNext());
+  }
+
+  @Test
+  public void testRandomFile() throws Exception {
+    Random r = new Random();
+    long seed = r.nextLong();
+    System.out.println("seed: " + seed);
+    r.setSeed(seed);
+
+    int len = r.nextInt(4096) + 512;
+    int blk = r.nextInt(len - 128) + 128;
+    FileStatus s = file(len, blk);
+    long nbytes = 0;
+    for (BlockProto b : blockId.resolve(s)) {
+      nbytes += b.getNumBytes();
+      assertTrue(512L * (1L << 20) >= b.getNumBytes());
+    }
+    assertEquals(s.getLen(), nbytes);
+  }
+
+  FileStatus file(long lenMB, long blocksizeMB) {
+    Path p = new Path("foo://bar:4344/baz/dingo");
+    return new FileStatus(
+          lenMB * (1 << 20),       /* long length,             */
+          false,                   /* boolean isdir,           */
+          1,                       /* int block_replication,   */
+          blocksizeMB * (1 << 20), /* long blocksize,          */
+          0L,                      /* long modification_time,  */
+          0L,                      /* long access_time,        */
+          null,                    /* FsPermission permission, */
+          "hadoop",                /* String owner,            */
+          "hadoop",                /* String group,            */
+          p);                      /* Path path                */
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/616765e9/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java
new file mode 100644
index 0000000..b8e6ac9
--- /dev/null
+++ b/hadoop-tools/hadoop-fs2img/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestRandomTreeWalk.java
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+import static org.junit.Assert.*;
+
+/**
+ * Validate randomly generated hierarchies, including fork() support in
+ * base class.
+ */
+public class TestRandomTreeWalk {
+
+  @Rule public TestName name = new TestName();
+
+  private Random r = new Random();
+
+  @Before
+  public void setSeed() {
+    long seed = r.nextLong();
+    r.setSeed(seed);
+    System.out.println(name.getMethodName() + " seed: " + seed);
+  }
+
+  @Test
+  public void testRandomTreeWalkRepeat() throws Exception {
+    Set<TreePath> ns = new HashSet<>();
+    final long seed = r.nextLong();
+    RandomTreeWalk t1 = new RandomTreeWalk(seed, 10, .1f);
+    int i = 0;
+    for (TreePath p : t1) {
+      p.accept(i++);
+      assertTrue(ns.add(p));
+    }
+
+    RandomTreeWalk t2 = new RandomTreeWalk(seed, 10, .1f);
+    int j = 0;
+    for (TreePath p : t2) {
+      p.accept(j++);
+      assertTrue(ns.remove(p));
+    }
+    assertTrue(ns.isEmpty());
+  }
+
+  @Test
+  public void testRandomTreeWalkFork() throws Exception {
+    Set<FileStatus> ns = new HashSet<>();
+
+    final long seed = r.nextLong();
+    RandomTreeWalk t1 = new RandomTreeWalk(seed, 10, .15f);
+    int i = 0;
+    for (TreePath p : t1) {
+      p.accept(i++);
+      assertTrue(ns.add(p.getFileStatus()));
+    }
+
+    RandomTreeWalk t2 = new RandomTreeWalk(seed, 10, .15f);
+    int j = 0;
+    ArrayList<TreeWalk.TreeIterator> iters = new ArrayList<>();
+    iters.add(t2.iterator());
+    while (!iters.isEmpty()) {
+      for (TreeWalk.TreeIterator sub = iters.remove(iters.size() - 1);
+           sub.hasNext();) {
+        TreePath p = sub.next();
+        if (0 == (r.nextInt() % 4)) {
+          iters.add(sub.fork());
+          Collections.shuffle(iters, r);
+        }
+        p.accept(j++);
+        assertTrue(ns.remove(p.getFileStatus()));
+      }
+    }
+    assertTrue(ns.isEmpty());
+  }
+
+  @Test
+  public void testRandomRootWalk() throws Exception {
+    Set<FileStatus> ns = new HashSet<>();
+    final long seed = r.nextLong();
+    Path root = new Path("foo://bar:4344/dingos");
+    String sroot = root.toString();
+    int nroot = sroot.length();
+    RandomTreeWalk t1 = new RandomTreeWalk(root, seed, 10, .1f);
+    int i = 0;
+    for (TreePath p : t1) {
+      p.accept(i++);
+      FileStatus stat = p.getFileStatus();
+      assertTrue(ns.add(stat));
+      assertEquals(sroot, stat.getPath().toString().substring(0, nroot));
+    }
+
+    RandomTreeWalk t2 = new RandomTreeWalk(root, seed, 10, .1f);
+    int j = 0;
+    for (TreePath p : t2) {
+      p.accept(j++);
+      FileStatus stat = p.getFileStatus();
+      assertTrue(ns.remove(stat));
+      assertEquals(sroot, stat.getPath().toString().substring(0, nroot));
+    }
+    assertTrue(ns.isEmpty());
+  }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org