You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sz...@apache.org on 2012/10/19 04:28:42 UTC
svn commit: r1399950 [9/11] - in /hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project: ./ bin/ conf/ dev-support/ hadoop-mapreduce-client/ hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/ hadoop-mapreduc...

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java Fri Oct 19 02:25:55 2012
@@ -1,58 +1,58 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert in a round-robin fashion. Choose all the
- * shards for each delete because we don't know where it is stored.
- */
-public class RoundRobinDistributionPolicy implements IDistributionPolicy {
-
-  private int numShards;
-  private int rr; // round-robin implementation
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
-   */
-  public void init(Shard[] shards) {
-    numShards = shards.length;
-    rr = 0;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
-   */
-  public int chooseShardForInsert(DocumentID key) {
-    int chosen = rr;
-    rr = (rr + 1) % numShards;
-    return chosen;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
-   */
-  public int chooseShardForDelete(DocumentID key) {
-    // -1 represents all the shards
-    return -1;
-  }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert in a round-robin fashion. Choose all the
+ * shards for each delete because we don't know where it is stored.
+ */
+public class RoundRobinDistributionPolicy implements IDistributionPolicy {
+
+  private int numShards;
+  private int rr; // round-robin implementation
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+   */
+  public void init(Shard[] shards) {
+    numShards = shards.length;
+    rr = 0;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+   */
+  public int chooseShardForInsert(DocumentID key) {
+    int chosen = rr;
+    rr = (rr + 1) % numShards;
+    return chosen;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+   */
+  public int chooseShardForDelete(DocumentID key) {
+    // -1 represents all the shards
+    return -1;
+  }
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java Fri Oct 19 02:25:55 2012
@@ -1,55 +1,55 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexFileNameFilter;
-
-/**
- * A wrapper class to convert an IndexFileNameFilter which implements
- * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
- */
-class LuceneIndexFileNameFilter implements PathFilter {
-
-  private static final LuceneIndexFileNameFilter singleton =
-      new LuceneIndexFileNameFilter();
-
-  /**
-   * Get a static instance.
-   * @return the static instance
-   */
-  public static LuceneIndexFileNameFilter getFilter() {
-    return singleton;
-  }
-
-  private final IndexFileNameFilter luceneFilter;
-
-  private LuceneIndexFileNameFilter() {
-    luceneFilter = IndexFileNameFilter.getFilter();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
-   */
-  public boolean accept(Path path) {
-    return luceneFilter.accept(null, path.getName());
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.lucene.index.IndexFileNameFilter;
+
+/**
+ * A wrapper class to convert an IndexFileNameFilter which implements
+ * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
+ */
+class LuceneIndexFileNameFilter implements PathFilter {
+
+  private static final LuceneIndexFileNameFilter singleton =
+      new LuceneIndexFileNameFilter();
+
+  /**
+   * Get a static instance.
+   * @return the static instance
+   */
+  public static LuceneIndexFileNameFilter getFilter() {
+    return singleton;
+  }
+
+  private final IndexFileNameFilter luceneFilter;
+
+  private LuceneIndexFileNameFilter() {
+    luceneFilter = IndexFileNameFilter.getFilter();
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
+   */
+  public boolean accept(Path path) {
+    return luceneFilter.accept(null, path.getName());
+  }
+
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java Fri Oct 19 02:25:55 2012
@@ -1,112 +1,112 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-
-/**
- * This class copies some methods from Lucene's SegmentInfos since that class
- * is not public.
- */
-public final class LuceneUtil {
-
-  static final class IndexFileNames {
-    /** Name of the index segment file */
-    static final String SEGMENTS = "segments";
-
-    /** Name of the generation reference file name */
-    static final String SEGMENTS_GEN = "segments.gen";
-  }
-
-  /**
-   * Check if the file is a segments_N file
-   * @param name
-   * @return true if the file is a segments_N file
-   */
-  public static boolean isSegmentsFile(String name) {
-    return name.startsWith(IndexFileNames.SEGMENTS)
-        && !name.equals(IndexFileNames.SEGMENTS_GEN);
-  }
-
-  /**
-   * Check if the file is the segments.gen file
-   * @param name
-   * @return true if the file is the segments.gen file
-   */
-  public static boolean isSegmentsGenFile(String name) {
-    return name.equals(IndexFileNames.SEGMENTS_GEN);
-  }
-
-  /**
-   * Get the generation (N) of the current segments_N file in the directory.
-   * 
-   * @param directory -- directory to search for the latest segments_N file
-   */
-  public static long getCurrentSegmentGeneration(Directory directory)
-      throws IOException {
-    String[] files = directory.list();
-    if (files == null)
-      throw new IOException("cannot read directory " + directory
-          + ": list() returned null");
-    return getCurrentSegmentGeneration(files);
-  }
-
-  /**
-   * Get the generation (N) of the current segments_N file from a list of
-   * files.
-   * 
-   * @param files -- array of file names to check
-   */
-  public static long getCurrentSegmentGeneration(String[] files) {
-    if (files == null) {
-      return -1;
-    }
-    long max = -1;
-    for (int i = 0; i < files.length; i++) {
-      String file = files[i];
-      if (file.startsWith(IndexFileNames.SEGMENTS)
-          && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
-        long gen = generationFromSegmentsFileName(file);
-        if (gen > max) {
-          max = gen;
-        }
-      }
-    }
-    return max;
-  }
-
-  /**
-   * Parse the generation off the segments file name and return it.
-   */
-  public static long generationFromSegmentsFileName(String fileName) {
-    if (fileName.equals(IndexFileNames.SEGMENTS)) {
-      return 0;
-    } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
-      return Long.parseLong(
-          fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
-          Character.MAX_RADIX);
-    } else {
-      throw new IllegalArgumentException("fileName \"" + fileName
-          + "\" is not a segments file");
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * This class copies some methods from Lucene's SegmentInfos since that class
+ * is not public.
+ */
+public final class LuceneUtil {
+
+  static final class IndexFileNames {
+    /** Name of the index segment file */
+    static final String SEGMENTS = "segments";
+
+    /** Name of the generation reference file name */
+    static final String SEGMENTS_GEN = "segments.gen";
+  }
+
+  /**
+   * Check if the file is a segments_N file
+   * @param name
+   * @return true if the file is a segments_N file
+   */
+  public static boolean isSegmentsFile(String name) {
+    return name.startsWith(IndexFileNames.SEGMENTS)
+        && !name.equals(IndexFileNames.SEGMENTS_GEN);
+  }
+
+  /**
+   * Check if the file is the segments.gen file
+   * @param name
+   * @return true if the file is the segments.gen file
+   */
+  public static boolean isSegmentsGenFile(String name) {
+    return name.equals(IndexFileNames.SEGMENTS_GEN);
+  }
+
+  /**
+   * Get the generation (N) of the current segments_N file in the directory.
+   * 
+   * @param directory -- directory to search for the latest segments_N file
+   */
+  public static long getCurrentSegmentGeneration(Directory directory)
+      throws IOException {
+    String[] files = directory.list();
+    if (files == null)
+      throw new IOException("cannot read directory " + directory
+          + ": list() returned null");
+    return getCurrentSegmentGeneration(files);
+  }
+
+  /**
+   * Get the generation (N) of the current segments_N file from a list of
+   * files.
+   * 
+   * @param files -- array of file names to check
+   */
+  public static long getCurrentSegmentGeneration(String[] files) {
+    if (files == null) {
+      return -1;
+    }
+    long max = -1;
+    for (int i = 0; i < files.length; i++) {
+      String file = files[i];
+      if (file.startsWith(IndexFileNames.SEGMENTS)
+          && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
+        long gen = generationFromSegmentsFileName(file);
+        if (gen > max) {
+          max = gen;
+        }
+      }
+    }
+    return max;
+  }
+
+  /**
+   * Parse the generation off the segments file name and return it.
+   */
+  public static long generationFromSegmentsFileName(String fileName) {
+    if (fileName.equals(IndexFileNames.SEGMENTS)) {
+      return 0;
+    } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+      return Long.parseLong(
+          fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
+          Character.MAX_RADIX);
+    } else {
+      throw new IllegalArgumentException("fileName \"" + fileName
+          + "\" is not a segments file");
+    }
+  }
+
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java Fri Oct 19 02:25:55 2012
@@ -1,49 +1,49 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.index.IndexCommitPoint;
-import org.apache.lucene.index.IndexDeletionPolicy;
-
-/**
- * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
- * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
- * writable directory (initially empty, keep latest after init).
- */
-class MixedDeletionPolicy implements IndexDeletionPolicy {
-
-  private int keepAllFromInit = 0;
-
-  public void onInit(List commits) throws IOException {
-    keepAllFromInit = commits.size();
-  }
-
-  public void onCommit(List commits) throws IOException {
-    int size = commits.size();
-    assert (size > keepAllFromInit);
-    // keep all from init and the latest, delete the rest
-    for (int i = keepAllFromInit; i < size - 1; i++) {
-      ((IndexCommitPoint) commits.get(i)).delete();
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.IndexCommitPoint;
+import org.apache.lucene.index.IndexDeletionPolicy;
+
+/**
+ * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
+ * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
+ * writable directory (initially empty, keep latest after init).
+ */
+class MixedDeletionPolicy implements IndexDeletionPolicy {
+
+  private int keepAllFromInit = 0;
+
+  public void onInit(List commits) throws IOException {
+    keepAllFromInit = commits.size();
+  }
+
+  public void onCommit(List commits) throws IOException {
+    int size = commits.size();
+    assert (size > keepAllFromInit);
+    // keep all from init and the latest, delete the rest
+    for (int i = keepAllFromInit; i < size - 1; i++) {
+      ((IndexCommitPoint) commits.get(i)).delete();
+    }
+  }
+
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java Fri Oct 19 02:25:55 2012
@@ -1,185 +1,185 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.NoLockFactory;
-
-/**
- * The initial version of an index is stored in a read-only FileSystem dir
- * (FileSystemDirectory). Index files created by newer versions are written to
- * a writable local FS dir (Lucene's FSDirectory). We should use the general
- * FileSystemDirectory for the writable dir as well. But have to use Lucene's
- * FSDirectory because currently Lucene does randome write and
- * FileSystemDirectory only supports sequential write.
- * 
- * Note: We may delete files from the read-only FileSystem dir because there
- * can be some segment files from an uncommitted checkpoint. For the same
- * reason, we may create files in the writable dir which already exist in the
- * read-only dir and logically they overwrite the ones in the read-only dir.
- */
-class MixedDirectory extends Directory {
-
-  private final Directory readDir; // FileSystemDirectory
-  private final Directory writeDir; // Lucene's FSDirectory
-
-  // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
-
-  public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
-      Path writePath, Configuration conf) throws IOException {
-
-    try {
-      readDir = new FileSystemDirectory(readFs, readPath, false, conf);
-      // check writeFS is a local FS?
-      writeDir = FSDirectory.getDirectory(writePath.toString());
-
-    } catch (IOException e) {
-      try {
-        close();
-      } catch (IOException e1) {
-        // ignore this one, throw the original one
-      }
-      throw e;
-    }
-
-    lockFactory = new NoLockFactory();
-  }
-
-  // for debugging
-  MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
-    this.readDir = readDir;
-    this.writeDir = writeDir;
-
-    lockFactory = new NoLockFactory();
-  }
-
-  @Override
-  public String[] list() throws IOException {
-    String[] readFiles = readDir.list();
-    String[] writeFiles = writeDir.list();
-
-    if (readFiles == null || readFiles.length == 0) {
-      return writeFiles;
-    } else if (writeFiles == null || writeFiles.length == 0) {
-      return readFiles;
-    } else {
-      String[] result = new String[readFiles.length + writeFiles.length];
-      System.arraycopy(readFiles, 0, result, 0, readFiles.length);
-      System.arraycopy(writeFiles, 0, result, readFiles.length,
-          writeFiles.length);
-      return result;
-    }
-  }
-
-  @Override
-  public void deleteFile(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      writeDir.deleteFile(name);
-    }
-    if (readDir.fileExists(name)) {
-      readDir.deleteFile(name);
-    }
-  }
-
-  @Override
-  public boolean fileExists(String name) throws IOException {
-    return writeDir.fileExists(name) || readDir.fileExists(name);
-  }
-
-  @Override
-  public long fileLength(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.fileLength(name);
-    } else {
-      return readDir.fileLength(name);
-    }
-  }
-
-  @Override
-  public long fileModified(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.fileModified(name);
-    } else {
-      return readDir.fileModified(name);
-    }
-  }
-
-  @Override
-  public void renameFile(String from, String to) throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public void touchFile(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      writeDir.touchFile(name);
-    } else {
-      readDir.touchFile(name);
-    }
-  }
-
-  @Override
-  public IndexOutput createOutput(String name) throws IOException {
-    return writeDir.createOutput(name);
-  }
-
-  @Override
-  public IndexInput openInput(String name) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.openInput(name);
-    } else {
-      return readDir.openInput(name);
-    }
-  }
-
-  @Override
-  public IndexInput openInput(String name, int bufferSize) throws IOException {
-    if (writeDir.fileExists(name)) {
-      return writeDir.openInput(name, bufferSize);
-    } else {
-      return readDir.openInput(name, bufferSize);
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    try {
-      if (readDir != null) {
-        readDir.close();
-      }
-    } finally {
-      if (writeDir != null) {
-        writeDir.close();
-      }
-    }
-  }
-
-  public String toString() {
-    return this.getClass().getName() + "@" + readDir + "&" + writeDir;
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.NoLockFactory;
+
+/**
+ * The initial version of an index is stored in a read-only FileSystem dir
+ * (FileSystemDirectory). Index files created by newer versions are written to
+ * a writable local FS dir (Lucene's FSDirectory). We should use the general
+ * FileSystemDirectory for the writable dir as well. But have to use Lucene's
+ * FSDirectory because currently Lucene does randome write and
+ * FileSystemDirectory only supports sequential write.
+ * 
+ * Note: We may delete files from the read-only FileSystem dir because there
+ * can be some segment files from an uncommitted checkpoint. For the same
+ * reason, we may create files in the writable dir which already exist in the
+ * read-only dir and logically they overwrite the ones in the read-only dir.
+ */
+class MixedDirectory extends Directory {
+
+  private final Directory readDir; // FileSystemDirectory
+  private final Directory writeDir; // Lucene's FSDirectory
+
+  // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
+
+  public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
+      Path writePath, Configuration conf) throws IOException {
+
+    try {
+      readDir = new FileSystemDirectory(readFs, readPath, false, conf);
+      // check writeFS is a local FS?
+      writeDir = FSDirectory.getDirectory(writePath.toString());
+
+    } catch (IOException e) {
+      try {
+        close();
+      } catch (IOException e1) {
+        // ignore this one, throw the original one
+      }
+      throw e;
+    }
+
+    lockFactory = new NoLockFactory();
+  }
+
+  // for debugging
+  MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
+    this.readDir = readDir;
+    this.writeDir = writeDir;
+
+    lockFactory = new NoLockFactory();
+  }
+
+  @Override
+  public String[] list() throws IOException {
+    String[] readFiles = readDir.list();
+    String[] writeFiles = writeDir.list();
+
+    if (readFiles == null || readFiles.length == 0) {
+      return writeFiles;
+    } else if (writeFiles == null || writeFiles.length == 0) {
+      return readFiles;
+    } else {
+      String[] result = new String[readFiles.length + writeFiles.length];
+      System.arraycopy(readFiles, 0, result, 0, readFiles.length);
+      System.arraycopy(writeFiles, 0, result, readFiles.length,
+          writeFiles.length);
+      return result;
+    }
+  }
+
+  @Override
+  public void deleteFile(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      writeDir.deleteFile(name);
+    }
+    if (readDir.fileExists(name)) {
+      readDir.deleteFile(name);
+    }
+  }
+
+  @Override
+  public boolean fileExists(String name) throws IOException {
+    return writeDir.fileExists(name) || readDir.fileExists(name);
+  }
+
+  @Override
+  public long fileLength(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.fileLength(name);
+    } else {
+      return readDir.fileLength(name);
+    }
+  }
+
+  @Override
+  public long fileModified(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.fileModified(name);
+    } else {
+      return readDir.fileModified(name);
+    }
+  }
+
+  @Override
+  public void renameFile(String from, String to) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void touchFile(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      writeDir.touchFile(name);
+    } else {
+      readDir.touchFile(name);
+    }
+  }
+
+  @Override
+  public IndexOutput createOutput(String name) throws IOException {
+    return writeDir.createOutput(name);
+  }
+
+  @Override
+  public IndexInput openInput(String name) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.openInput(name);
+    } else {
+      return readDir.openInput(name);
+    }
+  }
+
+  @Override
+  public IndexInput openInput(String name, int bufferSize) throws IOException {
+    if (writeDir.fileExists(name)) {
+      return writeDir.openInput(name, bufferSize);
+    } else {
+      return readDir.openInput(name, bufferSize);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      if (readDir != null) {
+        readDir.close();
+      }
+    } finally {
+      if (writeDir != null) {
+        writeDir.close();
+      }
+    }
+  }
+
+  public String toString() {
+    return this.getClass().getName() + "@" + readDir + "&" + writeDir;
+  }
+
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java Fri Oct 19 02:25:55 2012
@@ -1,119 +1,119 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMDirectory;
-
-/**
- * A utility class which writes an index in a ram dir into a DataOutput and
- * read from a DataInput an index into a ram dir.
- */
-public class RAMDirectoryUtil {
-  private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
-
-  /**
-   * Write a number of files from a ram directory to a data output.
-   * @param out  the data output
-   * @param dir  the ram directory
-   * @param names  the names of the files to write
-   * @throws IOException
-   */
-  public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
-      String[] names) throws IOException {
-    out.writeInt(names.length);
-
-    for (int i = 0; i < names.length; i++) {
-      Text.writeString(out, names[i]);
-      long length = dir.fileLength(names[i]);
-      out.writeLong(length);
-
-      if (length > 0) {
-        // can we avoid the extra copy?
-        IndexInput input = null;
-        try {
-          input = dir.openInput(names[i], BUFFER_SIZE);
-
-          int position = 0;
-          byte[] buffer = new byte[BUFFER_SIZE];
-
-          while (position < length) {
-            int len =
-                position + BUFFER_SIZE <= length ? BUFFER_SIZE
-                    : (int) (length - position);
-            input.readBytes(buffer, 0, len);
-            out.write(buffer, 0, len);
-            position += len;
-          }
-        } finally {
-          if (input != null) {
-            input.close();
-          }
-        }
-      }
-    }
-  }
-
-  /**
-   * Read a number of files from a data input to a ram directory.
-   * @param in  the data input
-   * @param dir  the ram directory
-   * @throws IOException
-   */
-  public static void readRAMFiles(DataInput in, RAMDirectory dir)
-      throws IOException {
-    int numFiles = in.readInt();
-
-    for (int i = 0; i < numFiles; i++) {
-      String name = Text.readString(in);
-      long length = in.readLong();
-
-      if (length > 0) {
-        // can we avoid the extra copy?
-        IndexOutput output = null;
-        try {
-          output = dir.createOutput(name);
-
-          int position = 0;
-          byte[] buffer = new byte[BUFFER_SIZE];
-
-          while (position < length) {
-            int len =
-                position + BUFFER_SIZE <= length ? BUFFER_SIZE
-                    : (int) (length - position);
-            in.readFully(buffer, 0, len);
-            output.writeBytes(buffer, 0, len);
-            position += len;
-          }
-        } finally {
-          if (output != null) {
-            output.close();
-          }
-        }
-      }
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * A utility class which writes an index in a ram dir into a DataOutput and
+ * read from a DataInput an index into a ram dir.
+ */
+public class RAMDirectoryUtil {
+  private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
+
+  /**
+   * Write a number of files from a ram directory to a data output.
+   * @param out  the data output
+   * @param dir  the ram directory
+   * @param names  the names of the files to write
+   * @throws IOException
+   */
+  public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
+      String[] names) throws IOException {
+    out.writeInt(names.length);
+
+    for (int i = 0; i < names.length; i++) {
+      Text.writeString(out, names[i]);
+      long length = dir.fileLength(names[i]);
+      out.writeLong(length);
+
+      if (length > 0) {
+        // can we avoid the extra copy?
+        IndexInput input = null;
+        try {
+          input = dir.openInput(names[i], BUFFER_SIZE);
+
+          int position = 0;
+          byte[] buffer = new byte[BUFFER_SIZE];
+
+          while (position < length) {
+            int len =
+                position + BUFFER_SIZE <= length ? BUFFER_SIZE
+                    : (int) (length - position);
+            input.readBytes(buffer, 0, len);
+            out.write(buffer, 0, len);
+            position += len;
+          }
+        } finally {
+          if (input != null) {
+            input.close();
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Read a number of files from a data input to a ram directory.
+   * @param in  the data input
+   * @param dir  the ram directory
+   * @throws IOException
+   */
+  public static void readRAMFiles(DataInput in, RAMDirectory dir)
+      throws IOException {
+    int numFiles = in.readInt();
+
+    for (int i = 0; i < numFiles; i++) {
+      String name = Text.readString(in);
+      long length = in.readLong();
+
+      if (length > 0) {
+        // can we avoid the extra copy?
+        IndexOutput output = null;
+        try {
+          output = dir.createOutput(name);
+
+          int position = 0;
+          byte[] buffer = new byte[BUFFER_SIZE];
+
+          while (position < length) {
+            int len =
+                position + BUFFER_SIZE <= length ? BUFFER_SIZE
+                    : (int) (length - position);
+            in.readFully(buffer, 0, len);
+            output.writeBytes(buffer, 0, len);
+            position += len;
+          }
+        } finally {
+          if (output != null) {
+            output.close();
+          }
+        }
+      }
+    }
+  }
+
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java Fri Oct 19 02:25:55 2012
@@ -1,233 +1,233 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
-import org.apache.hadoop.contrib.index.mapred.IntermediateForm;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.store.Directory;
-
-/**
- * The initial version of an index is stored in the perm dir. Index files
- * created by newer versions are written to a temp dir on the local FS. After
- * successfully creating the new version in the temp dir, the shard writer
- * moves the new files to the perm dir and deletes the temp dir in close().
- */
-public class ShardWriter {
-  static final Log LOG = LogFactory.getLog(ShardWriter.class);
-
-  private final FileSystem fs;
-  private final FileSystem localFs;
-  private final Path perm;
-  private final Path temp;
-  private final Directory dir;
-  private final IndexWriter writer;
-  private int maxNumSegments;
-  private long numForms = 0;
-
-  /**
-   * Constructor
-   * @param fs
-   * @param shard
-   * @param tempDir
-   * @param iconf
-   * @throws IOException
-   */
-  public ShardWriter(FileSystem fs, Shard shard, String tempDir,
-      IndexUpdateConfiguration iconf) throws IOException {
-    LOG.info("Construct a shard writer");
-
-    this.fs = fs;
-    localFs = FileSystem.getLocal(iconf.getConfiguration());
-    perm = new Path(shard.getDirectory());
-    temp = new Path(tempDir);
-
-    long initGeneration = shard.getGeneration();
-    if (!fs.exists(perm)) {
-      assert (initGeneration < 0);
-      fs.mkdirs(perm);
-    } else {
-      restoreGeneration(fs, perm, initGeneration);
-    }
-    dir =
-        new MixedDirectory(fs, perm, localFs, fs.startLocalOutput(perm, temp),
-            iconf.getConfiguration());
-
-    // analyzer is null because we only use addIndexes, not addDocument
-    writer =
-        new IndexWriter(dir, false, null,
-            initGeneration < 0 ? new KeepOnlyLastCommitDeletionPolicy()
-                : new MixedDeletionPolicy());
-    setParameters(iconf);
-  }
-
-  /**
-   * Process an intermediate form by carrying out, on the Lucene instance of
-   * the shard, the deletes and the inserts (a ram index) in the form. 
-   * @param form  the intermediate form containing deletes and a ram index
-   * @throws IOException
-   */
-  public void process(IntermediateForm form) throws IOException {
-    // first delete
-    Iterator<Term> iter = form.deleteTermIterator();
-    while (iter.hasNext()) {
-      writer.deleteDocuments(iter.next());
-    }
-    // then insert
-    writer.addIndexesNoOptimize(new Directory[] { form.getDirectory() });
-    numForms++;
-  }
-
-  /**
-   * Close the shard writer. Optimize the Lucene instance of the shard before
-   * closing if necessary, and copy the files created in the temp directory
-   * to the permanent directory after closing.
-   * @throws IOException
-   */
-  public void close() throws IOException {
-    LOG.info("Closing the shard writer, processed " + numForms + " forms");
-    try {
-      try {
-        if (maxNumSegments > 0) {
-          writer.optimize(maxNumSegments);
-          LOG.info("Optimized the shard into at most " + maxNumSegments
-              + " segments");
-        }
-      } finally {
-        writer.close();
-        LOG.info("Closed Lucene index writer");
-      }
-
-      moveFromTempToPerm();
-      LOG.info("Moved new index files to " + perm);
-
-    } finally {
-      dir.close();
-      LOG.info("Closed the shard writer");
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see java.lang.Object#toString()
-   */
-  public String toString() {
-    return this.getClass().getName() + "@" + perm + "&" + temp;
-  }
-
-  private void setParameters(IndexUpdateConfiguration iconf) {
-    int maxFieldLength = iconf.getIndexMaxFieldLength();
-    if (maxFieldLength > 0) {
-      writer.setMaxFieldLength(maxFieldLength);
-    }
-    writer.setUseCompoundFile(iconf.getIndexUseCompoundFile());
-    maxNumSegments = iconf.getIndexMaxNumSegments();
-
-    if (maxFieldLength > 0) {
-      LOG.info("sea.max.field.length = " + writer.getMaxFieldLength());
-    }
-    LOG.info("sea.use.compound.file = " + writer.getUseCompoundFile());
-    LOG.info("sea.max.num.segments = " + maxNumSegments);
-  }
-
-  // in case a previous reduce task fails, restore the generation to
-  // the original starting point by deleting the segments.gen file
-  // and the segments_N files whose generations are greater than the
-  // starting generation; rest of the unwanted files will be deleted
-  // once the unwanted segments_N files are deleted
-  private void restoreGeneration(FileSystem fs, Path perm, long startGen)
-      throws IOException {
-
-    FileStatus[] fileStatus = fs.listStatus(perm, new PathFilter() {
-      public boolean accept(Path path) {
-        return LuceneUtil.isSegmentsFile(path.getName());
-      }
-    });
-
-    // remove the segments_N files whose generation are greater than
-    // the starting generation
-    for (int i = 0; i < fileStatus.length; i++) {
-      Path path = fileStatus[i].getPath();
-      if (startGen < LuceneUtil.generationFromSegmentsFileName(path.getName())) {
-        fs.delete(path, true);
-      }
-    }
-
-    // always remove segments.gen in case last failed try removed segments_N
-    // but not segments.gen, and segments.gen will be overwritten anyway.
-    Path segmentsGenFile = new Path(LuceneUtil.IndexFileNames.SEGMENTS_GEN);
-    if (fs.exists(segmentsGenFile)) {
-      fs.delete(segmentsGenFile, true);
-    }
-  }
-
-  // move the files created in the temp dir into the perm dir
-  // and then delete the temp dir from the local FS
-  private void moveFromTempToPerm() throws IOException {
-    try {
-      FileStatus[] fileStatus =
-          localFs.listStatus(temp, LuceneIndexFileNameFilter.getFilter());
-      Path segmentsPath = null;
-      Path segmentsGenPath = null;
-
-      // move the files created in temp dir except segments_N and segments.gen
-      for (int i = 0; i < fileStatus.length; i++) {
-        Path path = fileStatus[i].getPath();
-        String name = path.getName();
-
-        if (LuceneUtil.isSegmentsGenFile(name)) {
-          assert (segmentsGenPath == null);
-          segmentsGenPath = path;
-        } else if (LuceneUtil.isSegmentsFile(name)) {
-          assert (segmentsPath == null);
-          segmentsPath = path;
-        } else {
-          fs.completeLocalOutput(new Path(perm, name), path);
-        }
-      }
-
-      // move the segments_N file
-      if (segmentsPath != null) {
-        fs.completeLocalOutput(new Path(perm, segmentsPath.getName()),
-            segmentsPath);
-      }
-
-      // move the segments.gen file
-      if (segmentsGenPath != null) {
-        fs.completeLocalOutput(new Path(perm, segmentsGenPath.getName()),
-            segmentsGenPath);
-      }
-    } finally {
-      // finally delete the temp dir (files should have been deleted)
-      localFs.delete(temp, true);
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
+import org.apache.hadoop.contrib.index.mapred.IntermediateForm;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+
+/**
+ * The initial version of an index is stored in the perm dir. Index files
+ * created by newer versions are written to a temp dir on the local FS. After
+ * successfully creating the new version in the temp dir, the shard writer
+ * moves the new files to the perm dir and deletes the temp dir in close().
+ */
+public class ShardWriter {
+  static final Log LOG = LogFactory.getLog(ShardWriter.class);
+
+  private final FileSystem fs;
+  private final FileSystem localFs;
+  private final Path perm;
+  private final Path temp;
+  private final Directory dir;
+  private final IndexWriter writer;
+  private int maxNumSegments;
+  private long numForms = 0;
+
+  /**
+   * Constructor
+   * @param fs
+   * @param shard
+   * @param tempDir
+   * @param iconf
+   * @throws IOException
+   */
+  public ShardWriter(FileSystem fs, Shard shard, String tempDir,
+      IndexUpdateConfiguration iconf) throws IOException {
+    LOG.info("Construct a shard writer");
+
+    this.fs = fs;
+    localFs = FileSystem.getLocal(iconf.getConfiguration());
+    perm = new Path(shard.getDirectory());
+    temp = new Path(tempDir);
+
+    long initGeneration = shard.getGeneration();
+    if (!fs.exists(perm)) {
+      assert (initGeneration < 0);
+      fs.mkdirs(perm);
+    } else {
+      restoreGeneration(fs, perm, initGeneration);
+    }
+    dir =
+        new MixedDirectory(fs, perm, localFs, fs.startLocalOutput(perm, temp),
+            iconf.getConfiguration());
+
+    // analyzer is null because we only use addIndexes, not addDocument
+    writer =
+        new IndexWriter(dir, false, null,
+            initGeneration < 0 ? new KeepOnlyLastCommitDeletionPolicy()
+                : new MixedDeletionPolicy());
+    setParameters(iconf);
+  }
+
+  /**
+   * Process an intermediate form by carrying out, on the Lucene instance of
+   * the shard, the deletes and the inserts (a ram index) in the form. 
+   * @param form  the intermediate form containing deletes and a ram index
+   * @throws IOException
+   */
+  public void process(IntermediateForm form) throws IOException {
+    // first delete
+    Iterator<Term> iter = form.deleteTermIterator();
+    while (iter.hasNext()) {
+      writer.deleteDocuments(iter.next());
+    }
+    // then insert
+    writer.addIndexesNoOptimize(new Directory[] { form.getDirectory() });
+    numForms++;
+  }
+
+  /**
+   * Close the shard writer. Optimize the Lucene instance of the shard before
+   * closing if necessary, and copy the files created in the temp directory
+   * to the permanent directory after closing.
+   * @throws IOException
+   */
+  public void close() throws IOException {
+    LOG.info("Closing the shard writer, processed " + numForms + " forms");
+    try {
+      try {
+        if (maxNumSegments > 0) {
+          writer.optimize(maxNumSegments);
+          LOG.info("Optimized the shard into at most " + maxNumSegments
+              + " segments");
+        }
+      } finally {
+        writer.close();
+        LOG.info("Closed Lucene index writer");
+      }
+
+      moveFromTempToPerm();
+      LOG.info("Moved new index files to " + perm);
+
+    } finally {
+      dir.close();
+      LOG.info("Closed the shard writer");
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see java.lang.Object#toString()
+   */
+  public String toString() {
+    return this.getClass().getName() + "@" + perm + "&" + temp;
+  }
+
+  private void setParameters(IndexUpdateConfiguration iconf) {
+    int maxFieldLength = iconf.getIndexMaxFieldLength();
+    if (maxFieldLength > 0) {
+      writer.setMaxFieldLength(maxFieldLength);
+    }
+    writer.setUseCompoundFile(iconf.getIndexUseCompoundFile());
+    maxNumSegments = iconf.getIndexMaxNumSegments();
+
+    if (maxFieldLength > 0) {
+      LOG.info("sea.max.field.length = " + writer.getMaxFieldLength());
+    }
+    LOG.info("sea.use.compound.file = " + writer.getUseCompoundFile());
+    LOG.info("sea.max.num.segments = " + maxNumSegments);
+  }
+
+  // in case a previous reduce task fails, restore the generation to
+  // the original starting point by deleting the segments.gen file
+  // and the segments_N files whose generations are greater than the
+  // starting generation; rest of the unwanted files will be deleted
+  // once the unwanted segments_N files are deleted
+  private void restoreGeneration(FileSystem fs, Path perm, long startGen)
+      throws IOException {
+
+    FileStatus[] fileStatus = fs.listStatus(perm, new PathFilter() {
+      public boolean accept(Path path) {
+        return LuceneUtil.isSegmentsFile(path.getName());
+      }
+    });
+
+    // remove the segments_N files whose generation are greater than
+    // the starting generation
+    for (int i = 0; i < fileStatus.length; i++) {
+      Path path = fileStatus[i].getPath();
+      if (startGen < LuceneUtil.generationFromSegmentsFileName(path.getName())) {
+        fs.delete(path, true);
+      }
+    }
+
+    // always remove segments.gen in case last failed try removed segments_N
+    // but not segments.gen, and segments.gen will be overwritten anyway.
+    Path segmentsGenFile = new Path(LuceneUtil.IndexFileNames.SEGMENTS_GEN);
+    if (fs.exists(segmentsGenFile)) {
+      fs.delete(segmentsGenFile, true);
+    }
+  }
+
+  // move the files created in the temp dir into the perm dir
+  // and then delete the temp dir from the local FS
+  private void moveFromTempToPerm() throws IOException {
+    try {
+      FileStatus[] fileStatus =
+          localFs.listStatus(temp, LuceneIndexFileNameFilter.getFilter());
+      Path segmentsPath = null;
+      Path segmentsGenPath = null;
+
+      // move the files created in temp dir except segments_N and segments.gen
+      for (int i = 0; i < fileStatus.length; i++) {
+        Path path = fileStatus[i].getPath();
+        String name = path.getName();
+
+        if (LuceneUtil.isSegmentsGenFile(name)) {
+          assert (segmentsGenPath == null);
+          segmentsGenPath = path;
+        } else if (LuceneUtil.isSegmentsFile(name)) {
+          assert (segmentsPath == null);
+          segmentsPath = path;
+        } else {
+          fs.completeLocalOutput(new Path(perm, name), path);
+        }
+      }
+
+      // move the segments_N file
+      if (segmentsPath != null) {
+        fs.completeLocalOutput(new Path(perm, segmentsPath.getName()),
+            segmentsPath);
+      }
+
+      // move the segments.gen file
+      if (segmentsGenPath != null) {
+        fs.completeLocalOutput(new Path(perm, segmentsGenPath.getName()),
+            segmentsGenPath);
+      }
+    } finally {
+      // finally delete the temp dir (files should have been deleted)
+      localFs.delete(temp, true);
+    }
+  }
+
+}

Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java Fri Oct 19 02:25:55 2012
@@ -1,276 +1,276 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.main;
-
-import java.io.IOException;
-import java.text.NumberFormat;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
-import org.apache.hadoop.contrib.index.mapred.IIndexUpdater;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.main;
+
+import java.io.IOException;
+import java.text.NumberFormat;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
+import org.apache.hadoop.contrib.index.mapred.IIndexUpdater;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.util.ReflectionUtils;
-
-/**
- * A distributed "index" is partitioned into "shards". Each shard corresponds
- * to a Lucene instance. This class contains the main() method which uses a
- * Map/Reduce job to analyze documents and update Lucene instances in parallel.
- * 
- * The main() method in UpdateIndex requires the following information for
- * updating the shards:
- *   - Input formatter. This specifies how to format the input documents.
- *   - Analysis. This defines the analyzer to use on the input. The analyzer
- *     determines whether a document is being inserted, updated, or deleted.
- *     For inserts or updates, the analyzer also converts each input document
- *     into a Lucene document.
- *   - Input paths. This provides the location(s) of updated documents,
- *     e.g., HDFS files or directories, or HBase tables.
- *   - Shard paths, or index path with the number of shards. Either specify
- *     the path for each shard, or specify an index path and the shards are
- *     the sub-directories of the index directory.
- *   - Output path. When the update to a shard is done, a message is put here.
- *   - Number of map tasks.
- *
- * All of the information can be specified in a configuration file. All but
- * the first two can also be specified as command line options. Check out
- * conf/index-config.xml.template for other configurable parameters.
- *
- * Note: Because of the parallel nature of Map/Reduce, the behaviour of
- * multiple inserts, deletes or updates to the same document is undefined.
- */
-public class UpdateIndex {
-  public static final Log LOG = LogFactory.getLog(UpdateIndex.class);
-
-  private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
-  static {
-    NUMBER_FORMAT.setMinimumIntegerDigits(5);
-    NUMBER_FORMAT.setGroupingUsed(false);
-  }
-
-  private static long now() {
-    return System.currentTimeMillis();
-  }
-
-  private static void printUsage(String cmd) {
-    System.err.println("Usage: java " + UpdateIndex.class.getName() + "\n"
-        + "                        -inputPaths <inputPath,inputPath>\n"
-        + "                        -outputPath <outputPath>\n"
-        + "                        -shards     <shardDir,shardDir>\n"
-        + "                        -indexPath  <indexPath>\n"
-        + "                        -numShards  <num>\n"
-        + "                        -numMapTasks <num>\n"
-        + "                        -conf       <confPath>\n"
-        + "Note: Do not use both -shards option and -indexPath option.");
-  }
-
-  private static String getIndexPath(Configuration conf) {
-    return conf.get("sea.index.path");
-  }
-
-  private static int getNumShards(Configuration conf) {
-    return conf.getInt("sea.num.shards", 1);
-  }
-
-  private static Shard[] createShards(String indexPath, int numShards,
-      Configuration conf) throws IOException {
-
-    String parent = Shard.normalizePath(indexPath) + Path.SEPARATOR;
-    long versionNumber = -1;
-    long generation = -1;
-
-    FileSystem fs = FileSystem.get(conf);
-    Path path = new Path(indexPath);
-
-    if (fs.exists(path)) {
-      FileStatus[] fileStatus = fs.listStatus(path);
-      String[] shardNames = new String[fileStatus.length];
-      int count = 0;
-      for (int i = 0; i < fileStatus.length; i++) {
-        if (fileStatus[i].isDirectory()) {
-          shardNames[count] = fileStatus[i].getPath().getName();
-          count++;
-        }
-      }
-      Arrays.sort(shardNames, 0, count);
-
-      Shard[] shards = new Shard[count >= numShards ? count : numShards];
-      for (int i = 0; i < count; i++) {
-        shards[i] =
-            new Shard(versionNumber, parent + shardNames[i], generation);
-      }
-
-      int number = count;
-      for (int i = count; i < numShards; i++) {
-        String shardPath;
-        while (true) {
-          shardPath = parent + NUMBER_FORMAT.format(number++);
-          if (!fs.exists(new Path(shardPath))) {
-            break;
-          }
-        }
-        shards[i] = new Shard(versionNumber, shardPath, generation);
-      }
-      return shards;
-    } else {
-      Shard[] shards = new Shard[numShards];
-      for (int i = 0; i < shards.length; i++) {
-        shards[i] =
-            new Shard(versionNumber, parent + NUMBER_FORMAT.format(i),
-                generation);
-      }
-      return shards;
-    }
-  }
-
-  /**
-   * The main() method
-   * @param argv
-   */
-  public static void main(String[] argv) {
-    if (argv.length == 0) {
-      printUsage("");
-      System.exit(-1);
-    }
-
-    String inputPathsString = null;
-    Path outputPath = null;
-    String shardsString = null;
-    String indexPath = null;
-    int numShards = -1;
-    int numMapTasks = -1;
-    Configuration conf = new Configuration();
-    String confPath = null;
-
-    // parse the command line
-    for (int i = 0; i < argv.length; i++) { // parse command line
-      if (argv[i].equals("-inputPaths")) {
-        inputPathsString = argv[++i];
-      } else if (argv[i].equals("-outputPath")) {
-        outputPath = new Path(argv[++i]);
-      } else if (argv[i].equals("-shards")) {
-        shardsString = argv[++i];
-      } else if (argv[i].equals("-indexPath")) {
-        indexPath = argv[++i];
-      } else if (argv[i].equals("-numShards")) {
-        numShards = Integer.parseInt(argv[++i]);
-      } else if (argv[i].equals("-numMapTasks")) {
-        numMapTasks = Integer.parseInt(argv[++i]);
-      } else if (argv[i].equals("-conf")) {
-        // add as a local FS resource
-        confPath = argv[++i];
-        conf.addResource(new Path(confPath));
-      } else {
-        System.out.println("Unknown option " + argv[i] + " w/ value "
-            + argv[++i]);
-      }
-    }
-    LOG.info("inputPaths = " + inputPathsString);
-    LOG.info("outputPath = " + outputPath);
-    LOG.info("shards     = " + shardsString);
-    LOG.info("indexPath  = " + indexPath);
-    LOG.info("numShards  = " + numShards);
-    LOG.info("numMapTasks= " + numMapTasks);
-    LOG.info("confPath   = " + confPath);
-
-    Path[] inputPaths = null;
-    Shard[] shards = null;
-
-    JobConf jobConf = new JobConf(conf);
-    IndexUpdateConfiguration iconf = new IndexUpdateConfiguration(jobConf);
-
-    if (inputPathsString != null) {
-      jobConf.set(org.apache.hadoop.mapreduce.lib.input.
-        FileInputFormat.INPUT_DIR, inputPathsString);
-    }
-    inputPaths = FileInputFormat.getInputPaths(jobConf);
-    if (inputPaths.length == 0) {
-      inputPaths = null;
-    }
-
-    if (outputPath == null) {
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A distributed "index" is partitioned into "shards". Each shard corresponds
+ * to a Lucene instance. This class contains the main() method which uses a
+ * Map/Reduce job to analyze documents and update Lucene instances in parallel.
+ * 
+ * The main() method in UpdateIndex requires the following information for
+ * updating the shards:
+ *   - Input formatter. This specifies how to format the input documents.
+ *   - Analysis. This defines the analyzer to use on the input. The analyzer
+ *     determines whether a document is being inserted, updated, or deleted.
+ *     For inserts or updates, the analyzer also converts each input document
+ *     into a Lucene document.
+ *   - Input paths. This provides the location(s) of updated documents,
+ *     e.g., HDFS files or directories, or HBase tables.
+ *   - Shard paths, or index path with the number of shards. Either specify
+ *     the path for each shard, or specify an index path and the shards are
+ *     the sub-directories of the index directory.
+ *   - Output path. When the update to a shard is done, a message is put here.
+ *   - Number of map tasks.
+ *
+ * All of the information can be specified in a configuration file. All but
+ * the first two can also be specified as command line options. Check out
+ * conf/index-config.xml.template for other configurable parameters.
+ *
+ * Note: Because of the parallel nature of Map/Reduce, the behaviour of
+ * multiple inserts, deletes or updates to the same document is undefined.
+ */
+public class UpdateIndex {
+  public static final Log LOG = LogFactory.getLog(UpdateIndex.class);
+
+  private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
+  static {
+    NUMBER_FORMAT.setMinimumIntegerDigits(5);
+    NUMBER_FORMAT.setGroupingUsed(false);
+  }
+
+  private static long now() {
+    return System.currentTimeMillis();
+  }
+
+  private static void printUsage(String cmd) {
+    System.err.println("Usage: java " + UpdateIndex.class.getName() + "\n"
+        + "                        -inputPaths <inputPath,inputPath>\n"
+        + "                        -outputPath <outputPath>\n"
+        + "                        -shards     <shardDir,shardDir>\n"
+        + "                        -indexPath  <indexPath>\n"
+        + "                        -numShards  <num>\n"
+        + "                        -numMapTasks <num>\n"
+        + "                        -conf       <confPath>\n"
+        + "Note: Do not use both -shards option and -indexPath option.");
+  }
+
+  private static String getIndexPath(Configuration conf) {
+    return conf.get("sea.index.path");
+  }
+
+  private static int getNumShards(Configuration conf) {
+    return conf.getInt("sea.num.shards", 1);
+  }
+
+  private static Shard[] createShards(String indexPath, int numShards,
+      Configuration conf) throws IOException {
+
+    String parent = Shard.normalizePath(indexPath) + Path.SEPARATOR;
+    long versionNumber = -1;
+    long generation = -1;
+
+    FileSystem fs = FileSystem.get(conf);
+    Path path = new Path(indexPath);
+
+    if (fs.exists(path)) {
+      FileStatus[] fileStatus = fs.listStatus(path);
+      String[] shardNames = new String[fileStatus.length];
+      int count = 0;
+      for (int i = 0; i < fileStatus.length; i++) {
+        if (fileStatus[i].isDirectory()) {
+          shardNames[count] = fileStatus[i].getPath().getName();
+          count++;
+        }
+      }
+      Arrays.sort(shardNames, 0, count);
+
+      Shard[] shards = new Shard[count >= numShards ? count : numShards];
+      for (int i = 0; i < count; i++) {
+        shards[i] =
+            new Shard(versionNumber, parent + shardNames[i], generation);
+      }
+
+      int number = count;
+      for (int i = count; i < numShards; i++) {
+        String shardPath;
+        while (true) {
+          shardPath = parent + NUMBER_FORMAT.format(number++);
+          if (!fs.exists(new Path(shardPath))) {
+            break;
+          }
+        }
+        shards[i] = new Shard(versionNumber, shardPath, generation);
+      }
+      return shards;
+    } else {
+      Shard[] shards = new Shard[numShards];
+      for (int i = 0; i < shards.length; i++) {
+        shards[i] =
+            new Shard(versionNumber, parent + NUMBER_FORMAT.format(i),
+                generation);
+      }
+      return shards;
+    }
+  }
+
+  /**
+   * The main() method
+   * @param argv
+   */
+  public static void main(String[] argv) {
+    if (argv.length == 0) {
+      printUsage("");
+      System.exit(-1);
+    }
+
+    String inputPathsString = null;
+    Path outputPath = null;
+    String shardsString = null;
+    String indexPath = null;
+    int numShards = -1;
+    int numMapTasks = -1;
+    Configuration conf = new Configuration();
+    String confPath = null;
+
+    // parse the command line
+    for (int i = 0; i < argv.length; i++) { // parse command line
+      if (argv[i].equals("-inputPaths")) {
+        inputPathsString = argv[++i];
+      } else if (argv[i].equals("-outputPath")) {
+        outputPath = new Path(argv[++i]);
+      } else if (argv[i].equals("-shards")) {
+        shardsString = argv[++i];
+      } else if (argv[i].equals("-indexPath")) {
+        indexPath = argv[++i];
+      } else if (argv[i].equals("-numShards")) {
+        numShards = Integer.parseInt(argv[++i]);
+      } else if (argv[i].equals("-numMapTasks")) {
+        numMapTasks = Integer.parseInt(argv[++i]);
+      } else if (argv[i].equals("-conf")) {
+        // add as a local FS resource
+        confPath = argv[++i];
+        conf.addResource(new Path(confPath));
+      } else {
+        System.out.println("Unknown option " + argv[i] + " w/ value "
+            + argv[++i]);
+      }
+    }
+    LOG.info("inputPaths = " + inputPathsString);
+    LOG.info("outputPath = " + outputPath);
+    LOG.info("shards     = " + shardsString);
+    LOG.info("indexPath  = " + indexPath);
+    LOG.info("numShards  = " + numShards);
+    LOG.info("numMapTasks= " + numMapTasks);
+    LOG.info("confPath   = " + confPath);
+
+    Path[] inputPaths = null;
+    Shard[] shards = null;
+
+    JobConf jobConf = new JobConf(conf);
+    IndexUpdateConfiguration iconf = new IndexUpdateConfiguration(jobConf);
+
+    if (inputPathsString != null) {
+      jobConf.set(org.apache.hadoop.mapreduce.lib.input.
+        FileInputFormat.INPUT_DIR, inputPathsString);
+    }
+    inputPaths = FileInputFormat.getInputPaths(jobConf);
+    if (inputPaths.length == 0) {
+      inputPaths = null;
+    }
+
+    if (outputPath == null) {
       outputPath = FileOutputFormat.getOutputPath(jobConf);
-    }
-
-    if (inputPaths == null || outputPath == null) {
-      System.err.println("InputPaths and outputPath must be specified.");
-      printUsage("");
-      System.exit(-1);
-    }
-
-    if (shardsString != null) {
-      iconf.setIndexShards(shardsString);
-    }
-    shards = Shard.getIndexShards(iconf);
-    if (shards != null && shards.length == 0) {
-      shards = null;
-    }
-
-    if (indexPath == null) {
-      indexPath = getIndexPath(conf);
-    }
-    if (numShards <= 0) {
-      numShards = getNumShards(conf);
-    }
-
-    if (shards == null && indexPath == null) {
-      System.err.println("Either shards or indexPath must be specified.");
-      printUsage("");
-      System.exit(-1);
-    }
-
-    if (numMapTasks <= 0) {
-      numMapTasks = jobConf.getNumMapTasks();
-    }
-
-    try {
-      // create shards and set their directories if necessary
-      if (shards == null) {
-        shards = createShards(indexPath, numShards, conf);
-      }
-
-      long startTime = now();
-      try {
-        IIndexUpdater updater =
-            (IIndexUpdater) ReflectionUtils.newInstance(
-                iconf.getIndexUpdaterClass(), conf);
-        LOG.info("sea.index.updater = "
-            + iconf.getIndexUpdaterClass().getName());
-
-        updater.run(conf, inputPaths, outputPath, numMapTasks, shards);
-        LOG.info("Index update job is done");
-
-      } finally {
-        long elapsedTime = now() - startTime;
-        LOG.info("Elapsed time is  " + (elapsedTime / 1000) + "s");
-        System.out.println("Elapsed time is " + (elapsedTime / 1000) + "s");
-      }
-    } catch (Exception e) {
-      e.printStackTrace(System.err);
-    }
-  }
-}
+    }
+
+    if (inputPaths == null || outputPath == null) {
+      System.err.println("InputPaths and outputPath must be specified.");
+      printUsage("");
+      System.exit(-1);
+    }
+
+    if (shardsString != null) {
+      iconf.setIndexShards(shardsString);
+    }
+    shards = Shard.getIndexShards(iconf);
+    if (shards != null && shards.length == 0) {
+      shards = null;
+    }
+
+    if (indexPath == null) {
+      indexPath = getIndexPath(conf);
+    }
+    if (numShards <= 0) {
+      numShards = getNumShards(conf);
+    }
+
+    if (shards == null && indexPath == null) {
+      System.err.println("Either shards or indexPath must be specified.");
+      printUsage("");
+      System.exit(-1);
+    }
+
+    if (numMapTasks <= 0) {
+      numMapTasks = jobConf.getNumMapTasks();
+    }
+
+    try {
+      // create shards and set their directories if necessary
+      if (shards == null) {
+        shards = createShards(indexPath, numShards, conf);
+      }
+
+      long startTime = now();
+      try {
+        IIndexUpdater updater =
+            (IIndexUpdater) ReflectionUtils.newInstance(
+                iconf.getIndexUpdaterClass(), conf);
+        LOG.info("sea.index.updater = "
+            + iconf.getIndexUpdaterClass().getName());
+
+        updater.run(conf, inputPaths, outputPath, numMapTasks, shards);
+        LOG.info("Index update job is done");
+
+      } finally {
+        long elapsedTime = now() - startTime;
+        LOG.info("Elapsed time is  " + (elapsedTime / 1000) + "s");
+        System.out.println("Elapsed time is " + (elapsedTime / 1000) + "s");
+      }
+    } catch (Exception e) {
+      e.printStackTrace(System.err);
+    }
+  }
+}