You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sz...@apache.org on 2012/10/19 04:28:42 UTC
svn commit: r1399950 [9/11] - in
/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project: ./ bin/ conf/
dev-support/ hadoop-mapreduce-client/
hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/
hadoop-mapreduc...
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java Fri Oct 19 02:25:55 2012
@@ -1,58 +1,58 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert in a round-robin fashion. Choose all the
- * shards for each delete because we don't know where it is stored.
- */
-public class RoundRobinDistributionPolicy implements IDistributionPolicy {
-
- private int numShards;
- private int rr; // round-robin implementation
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
- */
- public void init(Shard[] shards) {
- numShards = shards.length;
- rr = 0;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForInsert(DocumentID key) {
- int chosen = rr;
- rr = (rr + 1) % numShards;
- return chosen;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForDelete(DocumentID key) {
- // -1 represents all the shards
- return -1;
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert in a round-robin fashion. Choose all the
+ * shards for each delete because we don't know where it is stored.
+ */
+public class RoundRobinDistributionPolicy implements IDistributionPolicy {
+
+ private int numShards;
+ private int rr; // round-robin implementation
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+ */
+ public void init(Shard[] shards) {
+ numShards = shards.length;
+ rr = 0;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForInsert(DocumentID key) {
+ int chosen = rr;
+ rr = (rr + 1) % numShards;
+ return chosen;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForDelete(DocumentID key) {
+ // -1 represents all the shards
+ return -1;
+ }
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java Fri Oct 19 02:25:55 2012
@@ -1,55 +1,55 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexFileNameFilter;
-
-/**
- * A wrapper class to convert an IndexFileNameFilter which implements
- * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
- */
-class LuceneIndexFileNameFilter implements PathFilter {
-
- private static final LuceneIndexFileNameFilter singleton =
- new LuceneIndexFileNameFilter();
-
- /**
- * Get a static instance.
- * @return the static instance
- */
- public static LuceneIndexFileNameFilter getFilter() {
- return singleton;
- }
-
- private final IndexFileNameFilter luceneFilter;
-
- private LuceneIndexFileNameFilter() {
- luceneFilter = IndexFileNameFilter.getFilter();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
- */
- public boolean accept(Path path) {
- return luceneFilter.accept(null, path.getName());
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.lucene.index.IndexFileNameFilter;
+
+/**
+ * A wrapper class to convert an IndexFileNameFilter which implements
+ * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
+ */
+class LuceneIndexFileNameFilter implements PathFilter {
+
+ private static final LuceneIndexFileNameFilter singleton =
+ new LuceneIndexFileNameFilter();
+
+ /**
+ * Get a static instance.
+ * @return the static instance
+ */
+ public static LuceneIndexFileNameFilter getFilter() {
+ return singleton;
+ }
+
+ private final IndexFileNameFilter luceneFilter;
+
+ private LuceneIndexFileNameFilter() {
+ luceneFilter = IndexFileNameFilter.getFilter();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
+ */
+ public boolean accept(Path path) {
+ return luceneFilter.accept(null, path.getName());
+ }
+
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java Fri Oct 19 02:25:55 2012
@@ -1,112 +1,112 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-
-/**
- * This class copies some methods from Lucene's SegmentInfos since that class
- * is not public.
- */
-public final class LuceneUtil {
-
- static final class IndexFileNames {
- /** Name of the index segment file */
- static final String SEGMENTS = "segments";
-
- /** Name of the generation reference file name */
- static final String SEGMENTS_GEN = "segments.gen";
- }
-
- /**
- * Check if the file is a segments_N file
- * @param name
- * @return true if the file is a segments_N file
- */
- public static boolean isSegmentsFile(String name) {
- return name.startsWith(IndexFileNames.SEGMENTS)
- && !name.equals(IndexFileNames.SEGMENTS_GEN);
- }
-
- /**
- * Check if the file is the segments.gen file
- * @param name
- * @return true if the file is the segments.gen file
- */
- public static boolean isSegmentsGenFile(String name) {
- return name.equals(IndexFileNames.SEGMENTS_GEN);
- }
-
- /**
- * Get the generation (N) of the current segments_N file in the directory.
- *
- * @param directory -- directory to search for the latest segments_N file
- */
- public static long getCurrentSegmentGeneration(Directory directory)
- throws IOException {
- String[] files = directory.list();
- if (files == null)
- throw new IOException("cannot read directory " + directory
- + ": list() returned null");
- return getCurrentSegmentGeneration(files);
- }
-
- /**
- * Get the generation (N) of the current segments_N file from a list of
- * files.
- *
- * @param files -- array of file names to check
- */
- public static long getCurrentSegmentGeneration(String[] files) {
- if (files == null) {
- return -1;
- }
- long max = -1;
- for (int i = 0; i < files.length; i++) {
- String file = files[i];
- if (file.startsWith(IndexFileNames.SEGMENTS)
- && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
- long gen = generationFromSegmentsFileName(file);
- if (gen > max) {
- max = gen;
- }
- }
- }
- return max;
- }
-
- /**
- * Parse the generation off the segments file name and return it.
- */
- public static long generationFromSegmentsFileName(String fileName) {
- if (fileName.equals(IndexFileNames.SEGMENTS)) {
- return 0;
- } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
- return Long.parseLong(
- fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
- Character.MAX_RADIX);
- } else {
- throw new IllegalArgumentException("fileName \"" + fileName
- + "\" is not a segments file");
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * This class copies some methods from Lucene's SegmentInfos since that class
+ * is not public.
+ */
+public final class LuceneUtil {
+
+ static final class IndexFileNames {
+ /** Name of the index segment file */
+ static final String SEGMENTS = "segments";
+
+ /** Name of the generation reference file name */
+ static final String SEGMENTS_GEN = "segments.gen";
+ }
+
+ /**
+ * Check if the file is a segments_N file
+ * @param name
+ * @return true if the file is a segments_N file
+ */
+ public static boolean isSegmentsFile(String name) {
+ return name.startsWith(IndexFileNames.SEGMENTS)
+ && !name.equals(IndexFileNames.SEGMENTS_GEN);
+ }
+
+ /**
+ * Check if the file is the segments.gen file
+ * @param name
+ * @return true if the file is the segments.gen file
+ */
+ public static boolean isSegmentsGenFile(String name) {
+ return name.equals(IndexFileNames.SEGMENTS_GEN);
+ }
+
+ /**
+ * Get the generation (N) of the current segments_N file in the directory.
+ *
+ * @param directory -- directory to search for the latest segments_N file
+ */
+ public static long getCurrentSegmentGeneration(Directory directory)
+ throws IOException {
+ String[] files = directory.list();
+ if (files == null)
+ throw new IOException("cannot read directory " + directory
+ + ": list() returned null");
+ return getCurrentSegmentGeneration(files);
+ }
+
+ /**
+ * Get the generation (N) of the current segments_N file from a list of
+ * files.
+ *
+ * @param files -- array of file names to check
+ */
+ public static long getCurrentSegmentGeneration(String[] files) {
+ if (files == null) {
+ return -1;
+ }
+ long max = -1;
+ for (int i = 0; i < files.length; i++) {
+ String file = files[i];
+ if (file.startsWith(IndexFileNames.SEGMENTS)
+ && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
+ long gen = generationFromSegmentsFileName(file);
+ if (gen > max) {
+ max = gen;
+ }
+ }
+ }
+ return max;
+ }
+
+ /**
+ * Parse the generation off the segments file name and return it.
+ */
+ public static long generationFromSegmentsFileName(String fileName) {
+ if (fileName.equals(IndexFileNames.SEGMENTS)) {
+ return 0;
+ } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+ return Long.parseLong(
+ fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
+ Character.MAX_RADIX);
+ } else {
+ throw new IllegalArgumentException("fileName \"" + fileName
+ + "\" is not a segments file");
+ }
+ }
+
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java Fri Oct 19 02:25:55 2012
@@ -1,49 +1,49 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.index.IndexCommitPoint;
-import org.apache.lucene.index.IndexDeletionPolicy;
-
-/**
- * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
- * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
- * writable directory (initially empty, keep latest after init).
- */
-class MixedDeletionPolicy implements IndexDeletionPolicy {
-
- private int keepAllFromInit = 0;
-
- public void onInit(List commits) throws IOException {
- keepAllFromInit = commits.size();
- }
-
- public void onCommit(List commits) throws IOException {
- int size = commits.size();
- assert (size > keepAllFromInit);
- // keep all from init and the latest, delete the rest
- for (int i = keepAllFromInit; i < size - 1; i++) {
- ((IndexCommitPoint) commits.get(i)).delete();
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.IndexCommitPoint;
+import org.apache.lucene.index.IndexDeletionPolicy;
+
+/**
+ * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
+ * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
+ * writable directory (initially empty, keep latest after init).
+ */
+class MixedDeletionPolicy implements IndexDeletionPolicy {
+
+ private int keepAllFromInit = 0;
+
+ public void onInit(List commits) throws IOException {
+ keepAllFromInit = commits.size();
+ }
+
+ public void onCommit(List commits) throws IOException {
+ int size = commits.size();
+ assert (size > keepAllFromInit);
+ // keep all from init and the latest, delete the rest
+ for (int i = keepAllFromInit; i < size - 1; i++) {
+ ((IndexCommitPoint) commits.get(i)).delete();
+ }
+ }
+
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java Fri Oct 19 02:25:55 2012
@@ -1,185 +1,185 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.NoLockFactory;
-
-/**
- * The initial version of an index is stored in a read-only FileSystem dir
- * (FileSystemDirectory). Index files created by newer versions are written to
- * a writable local FS dir (Lucene's FSDirectory). We should use the general
- * FileSystemDirectory for the writable dir as well. But have to use Lucene's
- * FSDirectory because currently Lucene does randome write and
- * FileSystemDirectory only supports sequential write.
- *
- * Note: We may delete files from the read-only FileSystem dir because there
- * can be some segment files from an uncommitted checkpoint. For the same
- * reason, we may create files in the writable dir which already exist in the
- * read-only dir and logically they overwrite the ones in the read-only dir.
- */
-class MixedDirectory extends Directory {
-
- private final Directory readDir; // FileSystemDirectory
- private final Directory writeDir; // Lucene's FSDirectory
-
- // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
-
- public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
- Path writePath, Configuration conf) throws IOException {
-
- try {
- readDir = new FileSystemDirectory(readFs, readPath, false, conf);
- // check writeFS is a local FS?
- writeDir = FSDirectory.getDirectory(writePath.toString());
-
- } catch (IOException e) {
- try {
- close();
- } catch (IOException e1) {
- // ignore this one, throw the original one
- }
- throw e;
- }
-
- lockFactory = new NoLockFactory();
- }
-
- // for debugging
- MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
- this.readDir = readDir;
- this.writeDir = writeDir;
-
- lockFactory = new NoLockFactory();
- }
-
- @Override
- public String[] list() throws IOException {
- String[] readFiles = readDir.list();
- String[] writeFiles = writeDir.list();
-
- if (readFiles == null || readFiles.length == 0) {
- return writeFiles;
- } else if (writeFiles == null || writeFiles.length == 0) {
- return readFiles;
- } else {
- String[] result = new String[readFiles.length + writeFiles.length];
- System.arraycopy(readFiles, 0, result, 0, readFiles.length);
- System.arraycopy(writeFiles, 0, result, readFiles.length,
- writeFiles.length);
- return result;
- }
- }
-
- @Override
- public void deleteFile(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- writeDir.deleteFile(name);
- }
- if (readDir.fileExists(name)) {
- readDir.deleteFile(name);
- }
- }
-
- @Override
- public boolean fileExists(String name) throws IOException {
- return writeDir.fileExists(name) || readDir.fileExists(name);
- }
-
- @Override
- public long fileLength(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.fileLength(name);
- } else {
- return readDir.fileLength(name);
- }
- }
-
- @Override
- public long fileModified(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.fileModified(name);
- } else {
- return readDir.fileModified(name);
- }
- }
-
- @Override
- public void renameFile(String from, String to) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void touchFile(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- writeDir.touchFile(name);
- } else {
- readDir.touchFile(name);
- }
- }
-
- @Override
- public IndexOutput createOutput(String name) throws IOException {
- return writeDir.createOutput(name);
- }
-
- @Override
- public IndexInput openInput(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.openInput(name);
- } else {
- return readDir.openInput(name);
- }
- }
-
- @Override
- public IndexInput openInput(String name, int bufferSize) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.openInput(name, bufferSize);
- } else {
- return readDir.openInput(name, bufferSize);
- }
- }
-
- @Override
- public void close() throws IOException {
- try {
- if (readDir != null) {
- readDir.close();
- }
- } finally {
- if (writeDir != null) {
- writeDir.close();
- }
- }
- }
-
- public String toString() {
- return this.getClass().getName() + "@" + readDir + "&" + writeDir;
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.NoLockFactory;
+
+/**
+ * The initial version of an index is stored in a read-only FileSystem dir
+ * (FileSystemDirectory). Index files created by newer versions are written to
+ * a writable local FS dir (Lucene's FSDirectory). We should use the general
+ * FileSystemDirectory for the writable dir as well. But have to use Lucene's
+ * FSDirectory because currently Lucene does randome write and
+ * FileSystemDirectory only supports sequential write.
+ *
+ * Note: We may delete files from the read-only FileSystem dir because there
+ * can be some segment files from an uncommitted checkpoint. For the same
+ * reason, we may create files in the writable dir which already exist in the
+ * read-only dir and logically they overwrite the ones in the read-only dir.
+ */
+class MixedDirectory extends Directory {
+
+ private final Directory readDir; // FileSystemDirectory
+ private final Directory writeDir; // Lucene's FSDirectory
+
+ // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
+
+ public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
+ Path writePath, Configuration conf) throws IOException {
+
+ try {
+ readDir = new FileSystemDirectory(readFs, readPath, false, conf);
+ // check writeFS is a local FS?
+ writeDir = FSDirectory.getDirectory(writePath.toString());
+
+ } catch (IOException e) {
+ try {
+ close();
+ } catch (IOException e1) {
+ // ignore this one, throw the original one
+ }
+ throw e;
+ }
+
+ lockFactory = new NoLockFactory();
+ }
+
+ // for debugging
+ MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
+ this.readDir = readDir;
+ this.writeDir = writeDir;
+
+ lockFactory = new NoLockFactory();
+ }
+
+ @Override
+ public String[] list() throws IOException {
+ String[] readFiles = readDir.list();
+ String[] writeFiles = writeDir.list();
+
+ if (readFiles == null || readFiles.length == 0) {
+ return writeFiles;
+ } else if (writeFiles == null || writeFiles.length == 0) {
+ return readFiles;
+ } else {
+ String[] result = new String[readFiles.length + writeFiles.length];
+ System.arraycopy(readFiles, 0, result, 0, readFiles.length);
+ System.arraycopy(writeFiles, 0, result, readFiles.length,
+ writeFiles.length);
+ return result;
+ }
+ }
+
+ @Override
+ public void deleteFile(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ writeDir.deleteFile(name);
+ }
+ if (readDir.fileExists(name)) {
+ readDir.deleteFile(name);
+ }
+ }
+
+ @Override
+ public boolean fileExists(String name) throws IOException {
+ return writeDir.fileExists(name) || readDir.fileExists(name);
+ }
+
+ @Override
+ public long fileLength(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.fileLength(name);
+ } else {
+ return readDir.fileLength(name);
+ }
+ }
+
+ @Override
+ public long fileModified(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.fileModified(name);
+ } else {
+ return readDir.fileModified(name);
+ }
+ }
+
+ @Override
+ public void renameFile(String from, String to) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void touchFile(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ writeDir.touchFile(name);
+ } else {
+ readDir.touchFile(name);
+ }
+ }
+
+ @Override
+ public IndexOutput createOutput(String name) throws IOException {
+ return writeDir.createOutput(name);
+ }
+
+ @Override
+ public IndexInput openInput(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.openInput(name);
+ } else {
+ return readDir.openInput(name);
+ }
+ }
+
+ @Override
+ public IndexInput openInput(String name, int bufferSize) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.openInput(name, bufferSize);
+ } else {
+ return readDir.openInput(name, bufferSize);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ if (readDir != null) {
+ readDir.close();
+ }
+ } finally {
+ if (writeDir != null) {
+ writeDir.close();
+ }
+ }
+ }
+
+ public String toString() {
+ return this.getClass().getName() + "@" + readDir + "&" + writeDir;
+ }
+
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java Fri Oct 19 02:25:55 2012
@@ -1,119 +1,119 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMDirectory;
-
-/**
- * A utility class which writes an index in a ram dir into a DataOutput and
- * read from a DataInput an index into a ram dir.
- */
-public class RAMDirectoryUtil {
- private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
-
- /**
- * Write a number of files from a ram directory to a data output.
- * @param out the data output
- * @param dir the ram directory
- * @param names the names of the files to write
- * @throws IOException
- */
- public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
- String[] names) throws IOException {
- out.writeInt(names.length);
-
- for (int i = 0; i < names.length; i++) {
- Text.writeString(out, names[i]);
- long length = dir.fileLength(names[i]);
- out.writeLong(length);
-
- if (length > 0) {
- // can we avoid the extra copy?
- IndexInput input = null;
- try {
- input = dir.openInput(names[i], BUFFER_SIZE);
-
- int position = 0;
- byte[] buffer = new byte[BUFFER_SIZE];
-
- while (position < length) {
- int len =
- position + BUFFER_SIZE <= length ? BUFFER_SIZE
- : (int) (length - position);
- input.readBytes(buffer, 0, len);
- out.write(buffer, 0, len);
- position += len;
- }
- } finally {
- if (input != null) {
- input.close();
- }
- }
- }
- }
- }
-
- /**
- * Read a number of files from a data input to a ram directory.
- * @param in the data input
- * @param dir the ram directory
- * @throws IOException
- */
- public static void readRAMFiles(DataInput in, RAMDirectory dir)
- throws IOException {
- int numFiles = in.readInt();
-
- for (int i = 0; i < numFiles; i++) {
- String name = Text.readString(in);
- long length = in.readLong();
-
- if (length > 0) {
- // can we avoid the extra copy?
- IndexOutput output = null;
- try {
- output = dir.createOutput(name);
-
- int position = 0;
- byte[] buffer = new byte[BUFFER_SIZE];
-
- while (position < length) {
- int len =
- position + BUFFER_SIZE <= length ? BUFFER_SIZE
- : (int) (length - position);
- in.readFully(buffer, 0, len);
- output.writeBytes(buffer, 0, len);
- position += len;
- }
- } finally {
- if (output != null) {
- output.close();
- }
- }
- }
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * A utility class which writes an index in a ram dir into a DataOutput and
+ * read from a DataInput an index into a ram dir.
+ */
+public class RAMDirectoryUtil {
+ private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
+
+ /**
+ * Write a number of files from a ram directory to a data output.
+ * @param out the data output
+ * @param dir the ram directory
+ * @param names the names of the files to write
+ * @throws IOException
+ */
+ public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
+ String[] names) throws IOException {
+ out.writeInt(names.length);
+
+ for (int i = 0; i < names.length; i++) {
+ Text.writeString(out, names[i]);
+ long length = dir.fileLength(names[i]);
+ out.writeLong(length);
+
+ if (length > 0) {
+ // can we avoid the extra copy?
+ IndexInput input = null;
+ try {
+ input = dir.openInput(names[i], BUFFER_SIZE);
+
+ int position = 0;
+ byte[] buffer = new byte[BUFFER_SIZE];
+
+ while (position < length) {
+ int len =
+ position + BUFFER_SIZE <= length ? BUFFER_SIZE
+ : (int) (length - position);
+ input.readBytes(buffer, 0, len);
+ out.write(buffer, 0, len);
+ position += len;
+ }
+ } finally {
+ if (input != null) {
+ input.close();
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Read a number of files from a data input to a ram directory.
+ * @param in the data input
+ * @param dir the ram directory
+ * @throws IOException
+ */
+ public static void readRAMFiles(DataInput in, RAMDirectory dir)
+ throws IOException {
+ int numFiles = in.readInt();
+
+ for (int i = 0; i < numFiles; i++) {
+ String name = Text.readString(in);
+ long length = in.readLong();
+
+ if (length > 0) {
+ // can we avoid the extra copy?
+ IndexOutput output = null;
+ try {
+ output = dir.createOutput(name);
+
+ int position = 0;
+ byte[] buffer = new byte[BUFFER_SIZE];
+
+ while (position < length) {
+ int len =
+ position + BUFFER_SIZE <= length ? BUFFER_SIZE
+ : (int) (length - position);
+ in.readFully(buffer, 0, len);
+ output.writeBytes(buffer, 0, len);
+ position += len;
+ }
+ } finally {
+ if (output != null) {
+ output.close();
+ }
+ }
+ }
+ }
+ }
+
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/ShardWriter.java Fri Oct 19 02:25:55 2012
@@ -1,233 +1,233 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
-import org.apache.hadoop.contrib.index.mapred.IntermediateForm;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.store.Directory;
-
-/**
- * The initial version of an index is stored in the perm dir. Index files
- * created by newer versions are written to a temp dir on the local FS. After
- * successfully creating the new version in the temp dir, the shard writer
- * moves the new files to the perm dir and deletes the temp dir in close().
- */
-public class ShardWriter {
- static final Log LOG = LogFactory.getLog(ShardWriter.class);
-
- private final FileSystem fs;
- private final FileSystem localFs;
- private final Path perm;
- private final Path temp;
- private final Directory dir;
- private final IndexWriter writer;
- private int maxNumSegments;
- private long numForms = 0;
-
- /**
- * Constructor
- * @param fs
- * @param shard
- * @param tempDir
- * @param iconf
- * @throws IOException
- */
- public ShardWriter(FileSystem fs, Shard shard, String tempDir,
- IndexUpdateConfiguration iconf) throws IOException {
- LOG.info("Construct a shard writer");
-
- this.fs = fs;
- localFs = FileSystem.getLocal(iconf.getConfiguration());
- perm = new Path(shard.getDirectory());
- temp = new Path(tempDir);
-
- long initGeneration = shard.getGeneration();
- if (!fs.exists(perm)) {
- assert (initGeneration < 0);
- fs.mkdirs(perm);
- } else {
- restoreGeneration(fs, perm, initGeneration);
- }
- dir =
- new MixedDirectory(fs, perm, localFs, fs.startLocalOutput(perm, temp),
- iconf.getConfiguration());
-
- // analyzer is null because we only use addIndexes, not addDocument
- writer =
- new IndexWriter(dir, false, null,
- initGeneration < 0 ? new KeepOnlyLastCommitDeletionPolicy()
- : new MixedDeletionPolicy());
- setParameters(iconf);
- }
-
- /**
- * Process an intermediate form by carrying out, on the Lucene instance of
- * the shard, the deletes and the inserts (a ram index) in the form.
- * @param form the intermediate form containing deletes and a ram index
- * @throws IOException
- */
- public void process(IntermediateForm form) throws IOException {
- // first delete
- Iterator<Term> iter = form.deleteTermIterator();
- while (iter.hasNext()) {
- writer.deleteDocuments(iter.next());
- }
- // then insert
- writer.addIndexesNoOptimize(new Directory[] { form.getDirectory() });
- numForms++;
- }
-
- /**
- * Close the shard writer. Optimize the Lucene instance of the shard before
- * closing if necessary, and copy the files created in the temp directory
- * to the permanent directory after closing.
- * @throws IOException
- */
- public void close() throws IOException {
- LOG.info("Closing the shard writer, processed " + numForms + " forms");
- try {
- try {
- if (maxNumSegments > 0) {
- writer.optimize(maxNumSegments);
- LOG.info("Optimized the shard into at most " + maxNumSegments
- + " segments");
- }
- } finally {
- writer.close();
- LOG.info("Closed Lucene index writer");
- }
-
- moveFromTempToPerm();
- LOG.info("Moved new index files to " + perm);
-
- } finally {
- dir.close();
- LOG.info("Closed the shard writer");
- }
- }
-
- /* (non-Javadoc)
- * @see java.lang.Object#toString()
- */
- public String toString() {
- return this.getClass().getName() + "@" + perm + "&" + temp;
- }
-
- private void setParameters(IndexUpdateConfiguration iconf) {
- int maxFieldLength = iconf.getIndexMaxFieldLength();
- if (maxFieldLength > 0) {
- writer.setMaxFieldLength(maxFieldLength);
- }
- writer.setUseCompoundFile(iconf.getIndexUseCompoundFile());
- maxNumSegments = iconf.getIndexMaxNumSegments();
-
- if (maxFieldLength > 0) {
- LOG.info("sea.max.field.length = " + writer.getMaxFieldLength());
- }
- LOG.info("sea.use.compound.file = " + writer.getUseCompoundFile());
- LOG.info("sea.max.num.segments = " + maxNumSegments);
- }
-
- // in case a previous reduce task fails, restore the generation to
- // the original starting point by deleting the segments.gen file
- // and the segments_N files whose generations are greater than the
- // starting generation; rest of the unwanted files will be deleted
- // once the unwanted segments_N files are deleted
- private void restoreGeneration(FileSystem fs, Path perm, long startGen)
- throws IOException {
-
- FileStatus[] fileStatus = fs.listStatus(perm, new PathFilter() {
- public boolean accept(Path path) {
- return LuceneUtil.isSegmentsFile(path.getName());
- }
- });
-
- // remove the segments_N files whose generation are greater than
- // the starting generation
- for (int i = 0; i < fileStatus.length; i++) {
- Path path = fileStatus[i].getPath();
- if (startGen < LuceneUtil.generationFromSegmentsFileName(path.getName())) {
- fs.delete(path, true);
- }
- }
-
- // always remove segments.gen in case last failed try removed segments_N
- // but not segments.gen, and segments.gen will be overwritten anyway.
- Path segmentsGenFile = new Path(LuceneUtil.IndexFileNames.SEGMENTS_GEN);
- if (fs.exists(segmentsGenFile)) {
- fs.delete(segmentsGenFile, true);
- }
- }
-
- // move the files created in the temp dir into the perm dir
- // and then delete the temp dir from the local FS
- private void moveFromTempToPerm() throws IOException {
- try {
- FileStatus[] fileStatus =
- localFs.listStatus(temp, LuceneIndexFileNameFilter.getFilter());
- Path segmentsPath = null;
- Path segmentsGenPath = null;
-
- // move the files created in temp dir except segments_N and segments.gen
- for (int i = 0; i < fileStatus.length; i++) {
- Path path = fileStatus[i].getPath();
- String name = path.getName();
-
- if (LuceneUtil.isSegmentsGenFile(name)) {
- assert (segmentsGenPath == null);
- segmentsGenPath = path;
- } else if (LuceneUtil.isSegmentsFile(name)) {
- assert (segmentsPath == null);
- segmentsPath = path;
- } else {
- fs.completeLocalOutput(new Path(perm, name), path);
- }
- }
-
- // move the segments_N file
- if (segmentsPath != null) {
- fs.completeLocalOutput(new Path(perm, segmentsPath.getName()),
- segmentsPath);
- }
-
- // move the segments.gen file
- if (segmentsGenPath != null) {
- fs.completeLocalOutput(new Path(perm, segmentsGenPath.getName()),
- segmentsGenPath);
- }
- } finally {
- // finally delete the temp dir (files should have been deleted)
- localFs.delete(temp, true);
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
+import org.apache.hadoop.contrib.index.mapred.IntermediateForm;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+
+/**
+ * The initial version of an index is stored in the perm dir. Index files
+ * created by newer versions are written to a temp dir on the local FS. After
+ * successfully creating the new version in the temp dir, the shard writer
+ * moves the new files to the perm dir and deletes the temp dir in close().
+ */
+public class ShardWriter {
+ static final Log LOG = LogFactory.getLog(ShardWriter.class);
+
+ private final FileSystem fs;
+ private final FileSystem localFs;
+ private final Path perm;
+ private final Path temp;
+ private final Directory dir;
+ private final IndexWriter writer;
+ private int maxNumSegments;
+ private long numForms = 0;
+
+ /**
+ * Constructor
+ * @param fs
+ * @param shard
+ * @param tempDir
+ * @param iconf
+ * @throws IOException
+ */
+ public ShardWriter(FileSystem fs, Shard shard, String tempDir,
+ IndexUpdateConfiguration iconf) throws IOException {
+ LOG.info("Construct a shard writer");
+
+ this.fs = fs;
+ localFs = FileSystem.getLocal(iconf.getConfiguration());
+ perm = new Path(shard.getDirectory());
+ temp = new Path(tempDir);
+
+ long initGeneration = shard.getGeneration();
+ if (!fs.exists(perm)) {
+ assert (initGeneration < 0);
+ fs.mkdirs(perm);
+ } else {
+ restoreGeneration(fs, perm, initGeneration);
+ }
+ dir =
+ new MixedDirectory(fs, perm, localFs, fs.startLocalOutput(perm, temp),
+ iconf.getConfiguration());
+
+ // analyzer is null because we only use addIndexes, not addDocument
+ writer =
+ new IndexWriter(dir, false, null,
+ initGeneration < 0 ? new KeepOnlyLastCommitDeletionPolicy()
+ : new MixedDeletionPolicy());
+ setParameters(iconf);
+ }
+
+ /**
+ * Process an intermediate form by carrying out, on the Lucene instance of
+ * the shard, the deletes and the inserts (a ram index) in the form.
+ * @param form the intermediate form containing deletes and a ram index
+ * @throws IOException
+ */
+ public void process(IntermediateForm form) throws IOException {
+ // first delete
+ Iterator<Term> iter = form.deleteTermIterator();
+ while (iter.hasNext()) {
+ writer.deleteDocuments(iter.next());
+ }
+ // then insert
+ writer.addIndexesNoOptimize(new Directory[] { form.getDirectory() });
+ numForms++;
+ }
+
+ /**
+ * Close the shard writer. Optimize the Lucene instance of the shard before
+ * closing if necessary, and copy the files created in the temp directory
+ * to the permanent directory after closing.
+ * @throws IOException
+ */
+ public void close() throws IOException {
+ LOG.info("Closing the shard writer, processed " + numForms + " forms");
+ try {
+ try {
+ if (maxNumSegments > 0) {
+ writer.optimize(maxNumSegments);
+ LOG.info("Optimized the shard into at most " + maxNumSegments
+ + " segments");
+ }
+ } finally {
+ writer.close();
+ LOG.info("Closed Lucene index writer");
+ }
+
+ moveFromTempToPerm();
+ LOG.info("Moved new index files to " + perm);
+
+ } finally {
+ dir.close();
+ LOG.info("Closed the shard writer");
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ public String toString() {
+ return this.getClass().getName() + "@" + perm + "&" + temp;
+ }
+
+ private void setParameters(IndexUpdateConfiguration iconf) {
+ int maxFieldLength = iconf.getIndexMaxFieldLength();
+ if (maxFieldLength > 0) {
+ writer.setMaxFieldLength(maxFieldLength);
+ }
+ writer.setUseCompoundFile(iconf.getIndexUseCompoundFile());
+ maxNumSegments = iconf.getIndexMaxNumSegments();
+
+ if (maxFieldLength > 0) {
+ LOG.info("sea.max.field.length = " + writer.getMaxFieldLength());
+ }
+ LOG.info("sea.use.compound.file = " + writer.getUseCompoundFile());
+ LOG.info("sea.max.num.segments = " + maxNumSegments);
+ }
+
+ // in case a previous reduce task fails, restore the generation to
+ // the original starting point by deleting the segments.gen file
+ // and the segments_N files whose generations are greater than the
+ // starting generation; rest of the unwanted files will be deleted
+ // once the unwanted segments_N files are deleted
+ private void restoreGeneration(FileSystem fs, Path perm, long startGen)
+ throws IOException {
+
+ FileStatus[] fileStatus = fs.listStatus(perm, new PathFilter() {
+ public boolean accept(Path path) {
+ return LuceneUtil.isSegmentsFile(path.getName());
+ }
+ });
+
+ // remove the segments_N files whose generation are greater than
+ // the starting generation
+ for (int i = 0; i < fileStatus.length; i++) {
+ Path path = fileStatus[i].getPath();
+ if (startGen < LuceneUtil.generationFromSegmentsFileName(path.getName())) {
+ fs.delete(path, true);
+ }
+ }
+
+ // always remove segments.gen in case last failed try removed segments_N
+ // but not segments.gen, and segments.gen will be overwritten anyway.
+ Path segmentsGenFile = new Path(LuceneUtil.IndexFileNames.SEGMENTS_GEN);
+ if (fs.exists(segmentsGenFile)) {
+ fs.delete(segmentsGenFile, true);
+ }
+ }
+
+ // move the files created in the temp dir into the perm dir
+ // and then delete the temp dir from the local FS
+ private void moveFromTempToPerm() throws IOException {
+ try {
+ FileStatus[] fileStatus =
+ localFs.listStatus(temp, LuceneIndexFileNameFilter.getFilter());
+ Path segmentsPath = null;
+ Path segmentsGenPath = null;
+
+ // move the files created in temp dir except segments_N and segments.gen
+ for (int i = 0; i < fileStatus.length; i++) {
+ Path path = fileStatus[i].getPath();
+ String name = path.getName();
+
+ if (LuceneUtil.isSegmentsGenFile(name)) {
+ assert (segmentsGenPath == null);
+ segmentsGenPath = path;
+ } else if (LuceneUtil.isSegmentsFile(name)) {
+ assert (segmentsPath == null);
+ segmentsPath = path;
+ } else {
+ fs.completeLocalOutput(new Path(perm, name), path);
+ }
+ }
+
+ // move the segments_N file
+ if (segmentsPath != null) {
+ fs.completeLocalOutput(new Path(perm, segmentsPath.getName()),
+ segmentsPath);
+ }
+
+ // move the segments.gen file
+ if (segmentsGenPath != null) {
+ fs.completeLocalOutput(new Path(perm, segmentsGenPath.getName()),
+ segmentsGenPath);
+ }
+ } finally {
+ // finally delete the temp dir (files should have been deleted)
+ localFs.delete(temp, true);
+ }
+ }
+
+}
Modified: hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java?rev=1399950&r1=1399949&r2=1399950&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java (original)
+++ hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/main/UpdateIndex.java Fri Oct 19 02:25:55 2012
@@ -1,276 +1,276 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.main;
-
-import java.io.IOException;
-import java.text.NumberFormat;
-import java.util.Arrays;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
-import org.apache.hadoop.contrib.index.mapred.IIndexUpdater;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.main;
+
+import java.io.IOException;
+import java.text.NumberFormat;
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.contrib.index.mapred.IndexUpdateConfiguration;
+import org.apache.hadoop.contrib.index.mapred.IIndexUpdater;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.util.ReflectionUtils;
-
-/**
- * A distributed "index" is partitioned into "shards". Each shard corresponds
- * to a Lucene instance. This class contains the main() method which uses a
- * Map/Reduce job to analyze documents and update Lucene instances in parallel.
- *
- * The main() method in UpdateIndex requires the following information for
- * updating the shards:
- * - Input formatter. This specifies how to format the input documents.
- * - Analysis. This defines the analyzer to use on the input. The analyzer
- * determines whether a document is being inserted, updated, or deleted.
- * For inserts or updates, the analyzer also converts each input document
- * into a Lucene document.
- * - Input paths. This provides the location(s) of updated documents,
- * e.g., HDFS files or directories, or HBase tables.
- * - Shard paths, or index path with the number of shards. Either specify
- * the path for each shard, or specify an index path and the shards are
- * the sub-directories of the index directory.
- * - Output path. When the update to a shard is done, a message is put here.
- * - Number of map tasks.
- *
- * All of the information can be specified in a configuration file. All but
- * the first two can also be specified as command line options. Check out
- * conf/index-config.xml.template for other configurable parameters.
- *
- * Note: Because of the parallel nature of Map/Reduce, the behaviour of
- * multiple inserts, deletes or updates to the same document is undefined.
- */
-public class UpdateIndex {
- public static final Log LOG = LogFactory.getLog(UpdateIndex.class);
-
- private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
- static {
- NUMBER_FORMAT.setMinimumIntegerDigits(5);
- NUMBER_FORMAT.setGroupingUsed(false);
- }
-
- private static long now() {
- return System.currentTimeMillis();
- }
-
- private static void printUsage(String cmd) {
- System.err.println("Usage: java " + UpdateIndex.class.getName() + "\n"
- + " -inputPaths <inputPath,inputPath>\n"
- + " -outputPath <outputPath>\n"
- + " -shards <shardDir,shardDir>\n"
- + " -indexPath <indexPath>\n"
- + " -numShards <num>\n"
- + " -numMapTasks <num>\n"
- + " -conf <confPath>\n"
- + "Note: Do not use both -shards option and -indexPath option.");
- }
-
- private static String getIndexPath(Configuration conf) {
- return conf.get("sea.index.path");
- }
-
- private static int getNumShards(Configuration conf) {
- return conf.getInt("sea.num.shards", 1);
- }
-
- private static Shard[] createShards(String indexPath, int numShards,
- Configuration conf) throws IOException {
-
- String parent = Shard.normalizePath(indexPath) + Path.SEPARATOR;
- long versionNumber = -1;
- long generation = -1;
-
- FileSystem fs = FileSystem.get(conf);
- Path path = new Path(indexPath);
-
- if (fs.exists(path)) {
- FileStatus[] fileStatus = fs.listStatus(path);
- String[] shardNames = new String[fileStatus.length];
- int count = 0;
- for (int i = 0; i < fileStatus.length; i++) {
- if (fileStatus[i].isDirectory()) {
- shardNames[count] = fileStatus[i].getPath().getName();
- count++;
- }
- }
- Arrays.sort(shardNames, 0, count);
-
- Shard[] shards = new Shard[count >= numShards ? count : numShards];
- for (int i = 0; i < count; i++) {
- shards[i] =
- new Shard(versionNumber, parent + shardNames[i], generation);
- }
-
- int number = count;
- for (int i = count; i < numShards; i++) {
- String shardPath;
- while (true) {
- shardPath = parent + NUMBER_FORMAT.format(number++);
- if (!fs.exists(new Path(shardPath))) {
- break;
- }
- }
- shards[i] = new Shard(versionNumber, shardPath, generation);
- }
- return shards;
- } else {
- Shard[] shards = new Shard[numShards];
- for (int i = 0; i < shards.length; i++) {
- shards[i] =
- new Shard(versionNumber, parent + NUMBER_FORMAT.format(i),
- generation);
- }
- return shards;
- }
- }
-
- /**
- * The main() method
- * @param argv
- */
- public static void main(String[] argv) {
- if (argv.length == 0) {
- printUsage("");
- System.exit(-1);
- }
-
- String inputPathsString = null;
- Path outputPath = null;
- String shardsString = null;
- String indexPath = null;
- int numShards = -1;
- int numMapTasks = -1;
- Configuration conf = new Configuration();
- String confPath = null;
-
- // parse the command line
- for (int i = 0; i < argv.length; i++) { // parse command line
- if (argv[i].equals("-inputPaths")) {
- inputPathsString = argv[++i];
- } else if (argv[i].equals("-outputPath")) {
- outputPath = new Path(argv[++i]);
- } else if (argv[i].equals("-shards")) {
- shardsString = argv[++i];
- } else if (argv[i].equals("-indexPath")) {
- indexPath = argv[++i];
- } else if (argv[i].equals("-numShards")) {
- numShards = Integer.parseInt(argv[++i]);
- } else if (argv[i].equals("-numMapTasks")) {
- numMapTasks = Integer.parseInt(argv[++i]);
- } else if (argv[i].equals("-conf")) {
- // add as a local FS resource
- confPath = argv[++i];
- conf.addResource(new Path(confPath));
- } else {
- System.out.println("Unknown option " + argv[i] + " w/ value "
- + argv[++i]);
- }
- }
- LOG.info("inputPaths = " + inputPathsString);
- LOG.info("outputPath = " + outputPath);
- LOG.info("shards = " + shardsString);
- LOG.info("indexPath = " + indexPath);
- LOG.info("numShards = " + numShards);
- LOG.info("numMapTasks= " + numMapTasks);
- LOG.info("confPath = " + confPath);
-
- Path[] inputPaths = null;
- Shard[] shards = null;
-
- JobConf jobConf = new JobConf(conf);
- IndexUpdateConfiguration iconf = new IndexUpdateConfiguration(jobConf);
-
- if (inputPathsString != null) {
- jobConf.set(org.apache.hadoop.mapreduce.lib.input.
- FileInputFormat.INPUT_DIR, inputPathsString);
- }
- inputPaths = FileInputFormat.getInputPaths(jobConf);
- if (inputPaths.length == 0) {
- inputPaths = null;
- }
-
- if (outputPath == null) {
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * A distributed "index" is partitioned into "shards". Each shard corresponds
+ * to a Lucene instance. This class contains the main() method which uses a
+ * Map/Reduce job to analyze documents and update Lucene instances in parallel.
+ *
+ * The main() method in UpdateIndex requires the following information for
+ * updating the shards:
+ * - Input formatter. This specifies how to format the input documents.
+ * - Analysis. This defines the analyzer to use on the input. The analyzer
+ * determines whether a document is being inserted, updated, or deleted.
+ * For inserts or updates, the analyzer also converts each input document
+ * into a Lucene document.
+ * - Input paths. This provides the location(s) of updated documents,
+ * e.g., HDFS files or directories, or HBase tables.
+ * - Shard paths, or index path with the number of shards. Either specify
+ * the path for each shard, or specify an index path and the shards are
+ * the sub-directories of the index directory.
+ * - Output path. When the update to a shard is done, a message is put here.
+ * - Number of map tasks.
+ *
+ * All of the information can be specified in a configuration file. All but
+ * the first two can also be specified as command line options. Check out
+ * conf/index-config.xml.template for other configurable parameters.
+ *
+ * Note: Because of the parallel nature of Map/Reduce, the behaviour of
+ * multiple inserts, deletes or updates to the same document is undefined.
+ */
+public class UpdateIndex {
+ public static final Log LOG = LogFactory.getLog(UpdateIndex.class);
+
+ private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
+ static {
+ NUMBER_FORMAT.setMinimumIntegerDigits(5);
+ NUMBER_FORMAT.setGroupingUsed(false);
+ }
+
+ private static long now() {
+ return System.currentTimeMillis();
+ }
+
+ private static void printUsage(String cmd) {
+ System.err.println("Usage: java " + UpdateIndex.class.getName() + "\n"
+ + " -inputPaths <inputPath,inputPath>\n"
+ + " -outputPath <outputPath>\n"
+ + " -shards <shardDir,shardDir>\n"
+ + " -indexPath <indexPath>\n"
+ + " -numShards <num>\n"
+ + " -numMapTasks <num>\n"
+ + " -conf <confPath>\n"
+ + "Note: Do not use both -shards option and -indexPath option.");
+ }
+
+ private static String getIndexPath(Configuration conf) {
+ return conf.get("sea.index.path");
+ }
+
+ private static int getNumShards(Configuration conf) {
+ return conf.getInt("sea.num.shards", 1);
+ }
+
+ private static Shard[] createShards(String indexPath, int numShards,
+ Configuration conf) throws IOException {
+
+ String parent = Shard.normalizePath(indexPath) + Path.SEPARATOR;
+ long versionNumber = -1;
+ long generation = -1;
+
+ FileSystem fs = FileSystem.get(conf);
+ Path path = new Path(indexPath);
+
+ if (fs.exists(path)) {
+ FileStatus[] fileStatus = fs.listStatus(path);
+ String[] shardNames = new String[fileStatus.length];
+ int count = 0;
+ for (int i = 0; i < fileStatus.length; i++) {
+ if (fileStatus[i].isDirectory()) {
+ shardNames[count] = fileStatus[i].getPath().getName();
+ count++;
+ }
+ }
+ Arrays.sort(shardNames, 0, count);
+
+ Shard[] shards = new Shard[count >= numShards ? count : numShards];
+ for (int i = 0; i < count; i++) {
+ shards[i] =
+ new Shard(versionNumber, parent + shardNames[i], generation);
+ }
+
+ int number = count;
+ for (int i = count; i < numShards; i++) {
+ String shardPath;
+ while (true) {
+ shardPath = parent + NUMBER_FORMAT.format(number++);
+ if (!fs.exists(new Path(shardPath))) {
+ break;
+ }
+ }
+ shards[i] = new Shard(versionNumber, shardPath, generation);
+ }
+ return shards;
+ } else {
+ Shard[] shards = new Shard[numShards];
+ for (int i = 0; i < shards.length; i++) {
+ shards[i] =
+ new Shard(versionNumber, parent + NUMBER_FORMAT.format(i),
+ generation);
+ }
+ return shards;
+ }
+ }
+
+ /**
+ * The main() method
+ * @param argv
+ */
+ public static void main(String[] argv) {
+ if (argv.length == 0) {
+ printUsage("");
+ System.exit(-1);
+ }
+
+ String inputPathsString = null;
+ Path outputPath = null;
+ String shardsString = null;
+ String indexPath = null;
+ int numShards = -1;
+ int numMapTasks = -1;
+ Configuration conf = new Configuration();
+ String confPath = null;
+
+ // parse the command line
+ for (int i = 0; i < argv.length; i++) { // parse command line
+ if (argv[i].equals("-inputPaths")) {
+ inputPathsString = argv[++i];
+ } else if (argv[i].equals("-outputPath")) {
+ outputPath = new Path(argv[++i]);
+ } else if (argv[i].equals("-shards")) {
+ shardsString = argv[++i];
+ } else if (argv[i].equals("-indexPath")) {
+ indexPath = argv[++i];
+ } else if (argv[i].equals("-numShards")) {
+ numShards = Integer.parseInt(argv[++i]);
+ } else if (argv[i].equals("-numMapTasks")) {
+ numMapTasks = Integer.parseInt(argv[++i]);
+ } else if (argv[i].equals("-conf")) {
+ // add as a local FS resource
+ confPath = argv[++i];
+ conf.addResource(new Path(confPath));
+ } else {
+ System.out.println("Unknown option " + argv[i] + " w/ value "
+ + argv[++i]);
+ }
+ }
+ LOG.info("inputPaths = " + inputPathsString);
+ LOG.info("outputPath = " + outputPath);
+ LOG.info("shards = " + shardsString);
+ LOG.info("indexPath = " + indexPath);
+ LOG.info("numShards = " + numShards);
+ LOG.info("numMapTasks= " + numMapTasks);
+ LOG.info("confPath = " + confPath);
+
+ Path[] inputPaths = null;
+ Shard[] shards = null;
+
+ JobConf jobConf = new JobConf(conf);
+ IndexUpdateConfiguration iconf = new IndexUpdateConfiguration(jobConf);
+
+ if (inputPathsString != null) {
+ jobConf.set(org.apache.hadoop.mapreduce.lib.input.
+ FileInputFormat.INPUT_DIR, inputPathsString);
+ }
+ inputPaths = FileInputFormat.getInputPaths(jobConf);
+ if (inputPaths.length == 0) {
+ inputPaths = null;
+ }
+
+ if (outputPath == null) {
outputPath = FileOutputFormat.getOutputPath(jobConf);
- }
-
- if (inputPaths == null || outputPath == null) {
- System.err.println("InputPaths and outputPath must be specified.");
- printUsage("");
- System.exit(-1);
- }
-
- if (shardsString != null) {
- iconf.setIndexShards(shardsString);
- }
- shards = Shard.getIndexShards(iconf);
- if (shards != null && shards.length == 0) {
- shards = null;
- }
-
- if (indexPath == null) {
- indexPath = getIndexPath(conf);
- }
- if (numShards <= 0) {
- numShards = getNumShards(conf);
- }
-
- if (shards == null && indexPath == null) {
- System.err.println("Either shards or indexPath must be specified.");
- printUsage("");
- System.exit(-1);
- }
-
- if (numMapTasks <= 0) {
- numMapTasks = jobConf.getNumMapTasks();
- }
-
- try {
- // create shards and set their directories if necessary
- if (shards == null) {
- shards = createShards(indexPath, numShards, conf);
- }
-
- long startTime = now();
- try {
- IIndexUpdater updater =
- (IIndexUpdater) ReflectionUtils.newInstance(
- iconf.getIndexUpdaterClass(), conf);
- LOG.info("sea.index.updater = "
- + iconf.getIndexUpdaterClass().getName());
-
- updater.run(conf, inputPaths, outputPath, numMapTasks, shards);
- LOG.info("Index update job is done");
-
- } finally {
- long elapsedTime = now() - startTime;
- LOG.info("Elapsed time is " + (elapsedTime / 1000) + "s");
- System.out.println("Elapsed time is " + (elapsedTime / 1000) + "s");
- }
- } catch (Exception e) {
- e.printStackTrace(System.err);
- }
- }
-}
+ }
+
+ if (inputPaths == null || outputPath == null) {
+ System.err.println("InputPaths and outputPath must be specified.");
+ printUsage("");
+ System.exit(-1);
+ }
+
+ if (shardsString != null) {
+ iconf.setIndexShards(shardsString);
+ }
+ shards = Shard.getIndexShards(iconf);
+ if (shards != null && shards.length == 0) {
+ shards = null;
+ }
+
+ if (indexPath == null) {
+ indexPath = getIndexPath(conf);
+ }
+ if (numShards <= 0) {
+ numShards = getNumShards(conf);
+ }
+
+ if (shards == null && indexPath == null) {
+ System.err.println("Either shards or indexPath must be specified.");
+ printUsage("");
+ System.exit(-1);
+ }
+
+ if (numMapTasks <= 0) {
+ numMapTasks = jobConf.getNumMapTasks();
+ }
+
+ try {
+ // create shards and set their directories if necessary
+ if (shards == null) {
+ shards = createShards(indexPath, numShards, conf);
+ }
+
+ long startTime = now();
+ try {
+ IIndexUpdater updater =
+ (IIndexUpdater) ReflectionUtils.newInstance(
+ iconf.getIndexUpdaterClass(), conf);
+ LOG.info("sea.index.updater = "
+ + iconf.getIndexUpdaterClass().getName());
+
+ updater.run(conf, inputPaths, outputPath, numMapTasks, shards);
+ LOG.info("Index update job is done");
+
+ } finally {
+ long elapsedTime = now() - startTime;
+ LOG.info("Elapsed time is " + (elapsedTime / 1000) + "s");
+ System.out.println("Elapsed time is " + (elapsedTime / 1000) + "s");
+ }
+ } catch (Exception e) {
+ e.printStackTrace(System.err);
+ }
+ }
+}