You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by su...@apache.org on 2012/10/12 06:49:55 UTC
svn commit: r1397435 [2/5] - in
/hadoop/common/branches/branch-2/hadoop-mapreduce-project: ./
hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/
hadoop-mapreduce-examples/src/main/java/org/apache/hadoop...
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/sample/data2.txt Fri Oct 12 04:48:40 2012
@@ -1,10 +1,10 @@
-0 del
-1 upd hadoop
-2 del
-3 upd hadoop
-4 del
-5 upd hadoop
-6 del
-7 upd hadoop
-8 del
-9 upd hadoop
+0 del
+1 upd hadoop
+2 del
+3 upd hadoop
+4 del
+5 upd hadoop
+6 del
+7 upd hadoop
+8 del
+9 upd hadoop
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/HashingDistributionPolicy.java Fri Oct 12 04:48:40 2012
@@ -1,56 +1,56 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert or delete based on document id hashing. Do
- * NOT use this distribution policy when the number of shards changes.
- */
-public class HashingDistributionPolicy implements IDistributionPolicy {
-
- private int numShards;
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
- */
- public void init(Shard[] shards) {
- numShards = shards.length;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForInsert(DocumentID key) {
- int hashCode = key.hashCode();
- return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForDelete(DocumentID key) {
- int hashCode = key.hashCode();
- return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert or delete based on document id hashing. Do
+ * NOT use this distribution policy when the number of shards changes.
+ */
+public class HashingDistributionPolicy implements IDistributionPolicy {
+
+ private int numShards;
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+ */
+ public void init(Shard[] shards) {
+ numShards = shards.length;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForInsert(DocumentID key) {
+ int hashCode = key.hashCode();
+ return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForDelete(DocumentID key) {
+ int hashCode = key.hashCode();
+ return hashCode >= 0 ? hashCode % numShards : (-hashCode) % numShards;
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/IdentityLocalAnalysis.java Fri Oct 12 04:48:40 2012
@@ -1,57 +1,57 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * Identity local analysis maps inputs directly into outputs.
- */
-public class IdentityLocalAnalysis implements
- ILocalAnalysis<DocumentID, DocumentAndOp> {
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- public void map(DocumentID key, DocumentAndOp value,
- OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
- throws IOException {
- output.collect(key, value);
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
- */
- public void configure(JobConf job) {
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Closeable#close()
- */
- public void close() throws IOException {
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * Identity local analysis maps inputs directly into outputs.
+ */
+public class IdentityLocalAnalysis implements
+ ILocalAnalysis<DocumentID, DocumentAndOp> {
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
+ */
+ public void map(DocumentID key, DocumentAndOp value,
+ OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
+ throws IOException {
+ output.collect(key, value);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
+ */
+ public void configure(JobConf job) {
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Closeable#close()
+ */
+ public void close() throws IOException {
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocInputFormat.java Fri Oct 12 04:48:40 2012
@@ -1,46 +1,46 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-
-/**
- * An InputFormat for LineDoc for plain text files where each line is a doc.
- */
-public class LineDocInputFormat extends
- FileInputFormat<DocumentID, LineDocTextAndOp> {
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.FileInputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit, org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
- */
- public RecordReader<DocumentID, LineDocTextAndOp> getRecordReader(
- InputSplit split, JobConf job, Reporter reporter) throws IOException {
- reporter.setStatus(split.toString());
- return new LineDocRecordReader(job, (FileSplit) split);
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * An InputFormat for LineDoc for plain text files where each line is a doc.
+ */
+public class LineDocInputFormat extends
+ FileInputFormat<DocumentID, LineDocTextAndOp> {
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.FileInputFormat#getRecordReader(org.apache.hadoop.mapred.InputSplit, org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.Reporter)
+ */
+ public RecordReader<DocumentID, LineDocTextAndOp> getRecordReader(
+ InputSplit split, JobConf job, Reporter reporter) throws IOException {
+ reporter.setStatus(split.toString());
+ return new LineDocRecordReader(job, (FileSplit) split);
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocLocalAnalysis.java Fri Oct 12 04:48:40 2012
@@ -1,80 +1,80 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.Term;
-
-/**
- * Convert LineDocTextAndOp to DocumentAndOp as required by ILocalAnalysis.
- */
-public class LineDocLocalAnalysis implements
- ILocalAnalysis<DocumentID, LineDocTextAndOp> {
-
- private static String docidFieldName = "id";
- private static String contentFieldName = "content";
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
- */
- public void map(DocumentID key, LineDocTextAndOp value,
- OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
- throws IOException {
-
- DocumentAndOp.Op op = value.getOp();
- Document doc = null;
- Term term = null;
-
- if (op == DocumentAndOp.Op.INSERT || op == DocumentAndOp.Op.UPDATE) {
- doc = new Document();
- doc.add(new Field(docidFieldName, key.getText().toString(),
- Field.Store.YES, Field.Index.UN_TOKENIZED));
- doc.add(new Field(contentFieldName, value.getText().toString(),
- Field.Store.NO, Field.Index.TOKENIZED));
- }
-
- if (op == DocumentAndOp.Op.DELETE || op == DocumentAndOp.Op.UPDATE) {
- term = new Term(docidFieldName, key.getText().toString());
- }
-
- output.collect(key, new DocumentAndOp(op, doc, term));
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
- */
- public void configure(JobConf job) {
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Closeable#close()
- */
- public void close() throws IOException {
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.ILocalAnalysis;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+
+/**
+ * Convert LineDocTextAndOp to DocumentAndOp as required by ILocalAnalysis.
+ */
+public class LineDocLocalAnalysis implements
+ ILocalAnalysis<DocumentID, LineDocTextAndOp> {
+
+ private static String docidFieldName = "id";
+ private static String contentFieldName = "content";
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
+ */
+ public void map(DocumentID key, LineDocTextAndOp value,
+ OutputCollector<DocumentID, DocumentAndOp> output, Reporter reporter)
+ throws IOException {
+
+ DocumentAndOp.Op op = value.getOp();
+ Document doc = null;
+ Term term = null;
+
+ if (op == DocumentAndOp.Op.INSERT || op == DocumentAndOp.Op.UPDATE) {
+ doc = new Document();
+ doc.add(new Field(docidFieldName, key.getText().toString(),
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field(contentFieldName, value.getText().toString(),
+ Field.Store.NO, Field.Index.TOKENIZED));
+ }
+
+ if (op == DocumentAndOp.Op.DELETE || op == DocumentAndOp.Op.UPDATE) {
+ term = new Term(docidFieldName, key.getText().toString());
+ }
+
+ output.collect(key, new DocumentAndOp(op, doc, term));
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.JobConfigurable#configure(org.apache.hadoop.mapred.JobConf)
+ */
+ public void configure(JobConf job) {
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Closeable#close()
+ */
+ public void close() throws IOException {
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocRecordReader.java Fri Oct 12 04:48:40 2012
@@ -1,231 +1,231 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.RecordReader;
-
-/**
- * A simple RecordReader for LineDoc for plain text files where each line is a
- * doc. Each line is as follows: documentID<SPACE>op<SPACE>content<EOF>,
- * where op can be "i", "ins" or "insert" for insert, "d", "del" or "delete"
- * for delete, or "u", "upd" or "update" for update.
- */
-public class LineDocRecordReader implements
- RecordReader<DocumentID, LineDocTextAndOp> {
- private static final char SPACE = ' ';
- private static final char EOL = '\n';
-
- private long start;
- private long pos;
- private long end;
- private BufferedInputStream in;
- private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
-
- /**
- * Provide a bridge to get the bytes from the ByteArrayOutputStream without
- * creating a new byte array.
- */
- private static class TextStuffer extends OutputStream {
- public Text target;
-
- public void write(int b) {
- throw new UnsupportedOperationException("write(byte) not supported");
- }
-
- public void write(byte[] data, int offset, int len) throws IOException {
- target.set(data, offset, len);
- }
- }
-
- private TextStuffer bridge = new TextStuffer();
-
- /**
- * Constructor
- * @param job
- * @param split
- * @throws IOException
- */
- public LineDocRecordReader(Configuration job, FileSplit split)
- throws IOException {
- long start = split.getStart();
- long end = start + split.getLength();
- final Path file = split.getPath();
-
- // open the file and seek to the start of the split
- FileSystem fs = file.getFileSystem(job);
- FSDataInputStream fileIn = fs.open(split.getPath());
- InputStream in = fileIn;
- boolean skipFirstLine = false;
- if (start != 0) {
- skipFirstLine = true; // wait till BufferedInputStream to skip
- --start;
- fileIn.seek(start);
- }
-
- this.in = new BufferedInputStream(in);
- if (skipFirstLine) { // skip first line and re-establish "start".
- start += LineDocRecordReader.readData(this.in, null, EOL);
- }
- this.start = start;
- this.pos = start;
- this.end = end;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#close()
- */
- public void close() throws IOException {
- in.close();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#createKey()
- */
- public DocumentID createKey() {
- return new DocumentID();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#createValue()
- */
- public LineDocTextAndOp createValue() {
- return new LineDocTextAndOp();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#getPos()
- */
- public long getPos() throws IOException {
- return pos;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#getProgress()
- */
- public float getProgress() throws IOException {
- if (start == end) {
- return 0.0f;
- } else {
- return Math.min(1.0f, (pos - start) / (float) (end - start));
- }
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object, java.lang.Object)
- */
- public synchronized boolean next(DocumentID key, LineDocTextAndOp value)
- throws IOException {
- if (pos >= end) {
- return false;
- }
-
- // key is document id, which are bytes until first space
- if (!readInto(key.getText(), SPACE)) {
- return false;
- }
-
- // read operation: i/d/u, or ins/del/upd, or insert/delete/update
- Text opText = new Text();
- if (!readInto(opText, SPACE)) {
- return false;
- }
- String opStr = opText.toString();
- DocumentAndOp.Op op;
- if (opStr.equals("i") || opStr.equals("ins") || opStr.equals("insert")) {
- op = DocumentAndOp.Op.INSERT;
- } else if (opStr.equals("d") || opStr.equals("del")
- || opStr.equals("delete")) {
- op = DocumentAndOp.Op.DELETE;
- } else if (opStr.equals("u") || opStr.equals("upd")
- || opStr.equals("update")) {
- op = DocumentAndOp.Op.UPDATE;
- } else {
- // default is insert
- op = DocumentAndOp.Op.INSERT;
- }
- value.setOp(op);
-
- if (op == DocumentAndOp.Op.DELETE) {
- return true;
- } else {
- // read rest of the line
- return readInto(value.getText(), EOL);
- }
- }
-
- private boolean readInto(Text text, char delimiter) throws IOException {
- buffer.reset();
- long bytesRead = readData(in, buffer, delimiter);
- if (bytesRead == 0) {
- return false;
- }
- pos += bytesRead;
- bridge.target = text;
- buffer.writeTo(bridge);
- return true;
- }
-
- private static long readData(InputStream in, OutputStream out, char delimiter)
- throws IOException {
- long bytes = 0;
- while (true) {
-
- int b = in.read();
- if (b == -1) {
- break;
- }
- bytes += 1;
-
- byte c = (byte) b;
- if (c == EOL || c == delimiter) {
- break;
- }
-
- if (c == '\r') {
- in.mark(1);
- byte nextC = (byte) in.read();
- if (nextC != EOL || c == delimiter) {
- in.reset();
- } else {
- bytes += 1;
- }
- break;
- }
-
- if (out != null) {
- out.write(c);
- }
- }
- return bytes;
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.RecordReader;
+
+/**
+ * A simple RecordReader for LineDoc for plain text files where each line is a
+ * doc. Each line is as follows: documentID<SPACE>op<SPACE>content<EOF>,
+ * where op can be "i", "ins" or "insert" for insert, "d", "del" or "delete"
+ * for delete, or "u", "upd" or "update" for update.
+ */
+public class LineDocRecordReader implements
+ RecordReader<DocumentID, LineDocTextAndOp> {
+ private static final char SPACE = ' ';
+ private static final char EOL = '\n';
+
+ private long start;
+ private long pos;
+ private long end;
+ private BufferedInputStream in;
+ private ByteArrayOutputStream buffer = new ByteArrayOutputStream(256);
+
+ /**
+ * Provide a bridge to get the bytes from the ByteArrayOutputStream without
+ * creating a new byte array.
+ */
+ private static class TextStuffer extends OutputStream {
+ public Text target;
+
+ public void write(int b) {
+ throw new UnsupportedOperationException("write(byte) not supported");
+ }
+
+ public void write(byte[] data, int offset, int len) throws IOException {
+ target.set(data, offset, len);
+ }
+ }
+
+ private TextStuffer bridge = new TextStuffer();
+
+ /**
+ * Constructor
+ * @param job
+ * @param split
+ * @throws IOException
+ */
+ public LineDocRecordReader(Configuration job, FileSplit split)
+ throws IOException {
+ long start = split.getStart();
+ long end = start + split.getLength();
+ final Path file = split.getPath();
+
+ // open the file and seek to the start of the split
+ FileSystem fs = file.getFileSystem(job);
+ FSDataInputStream fileIn = fs.open(split.getPath());
+ InputStream in = fileIn;
+ boolean skipFirstLine = false;
+ if (start != 0) {
+ skipFirstLine = true; // wait till BufferedInputStream to skip
+ --start;
+ fileIn.seek(start);
+ }
+
+ this.in = new BufferedInputStream(in);
+ if (skipFirstLine) { // skip first line and re-establish "start".
+ start += LineDocRecordReader.readData(this.in, null, EOL);
+ }
+ this.start = start;
+ this.pos = start;
+ this.end = end;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#close()
+ */
+ public void close() throws IOException {
+ in.close();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#createKey()
+ */
+ public DocumentID createKey() {
+ return new DocumentID();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#createValue()
+ */
+ public LineDocTextAndOp createValue() {
+ return new LineDocTextAndOp();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#getPos()
+ */
+ public long getPos() throws IOException {
+ return pos;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#getProgress()
+ */
+ public float getProgress() throws IOException {
+ if (start == end) {
+ return 0.0f;
+ } else {
+ return Math.min(1.0f, (pos - start) / (float) (end - start));
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.mapred.RecordReader#next(java.lang.Object, java.lang.Object)
+ */
+ public synchronized boolean next(DocumentID key, LineDocTextAndOp value)
+ throws IOException {
+ if (pos >= end) {
+ return false;
+ }
+
+ // key is document id, which are bytes until first space
+ if (!readInto(key.getText(), SPACE)) {
+ return false;
+ }
+
+ // read operation: i/d/u, or ins/del/upd, or insert/delete/update
+ Text opText = new Text();
+ if (!readInto(opText, SPACE)) {
+ return false;
+ }
+ String opStr = opText.toString();
+ DocumentAndOp.Op op;
+ if (opStr.equals("i") || opStr.equals("ins") || opStr.equals("insert")) {
+ op = DocumentAndOp.Op.INSERT;
+ } else if (opStr.equals("d") || opStr.equals("del")
+ || opStr.equals("delete")) {
+ op = DocumentAndOp.Op.DELETE;
+ } else if (opStr.equals("u") || opStr.equals("upd")
+ || opStr.equals("update")) {
+ op = DocumentAndOp.Op.UPDATE;
+ } else {
+ // default is insert
+ op = DocumentAndOp.Op.INSERT;
+ }
+ value.setOp(op);
+
+ if (op == DocumentAndOp.Op.DELETE) {
+ return true;
+ } else {
+ // read rest of the line
+ return readInto(value.getText(), EOL);
+ }
+ }
+
+ private boolean readInto(Text text, char delimiter) throws IOException {
+ buffer.reset();
+ long bytesRead = readData(in, buffer, delimiter);
+ if (bytesRead == 0) {
+ return false;
+ }
+ pos += bytesRead;
+ bridge.target = text;
+ buffer.writeTo(bridge);
+ return true;
+ }
+
+ private static long readData(InputStream in, OutputStream out, char delimiter)
+ throws IOException {
+ long bytes = 0;
+ while (true) {
+
+ int b = in.read();
+ if (b == -1) {
+ break;
+ }
+ bytes += 1;
+
+ byte c = (byte) b;
+ if (c == EOL || c == delimiter) {
+ break;
+ }
+
+ if (c == '\r') {
+ in.mark(1);
+ byte nextC = (byte) in.read();
+ if (nextC != EOL || c == delimiter) {
+ in.reset();
+ } else {
+ bytes += 1;
+ }
+ break;
+ }
+
+ if (out != null) {
+ out.write(c);
+ }
+ }
+ return bytes;
+ }
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/LineDocTextAndOp.java Fri Oct 12 04:48:40 2012
@@ -1,92 +1,92 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-/**
- * This class represents an operation. The operation can be an insert, a delete
- * or an update. If the operation is an insert or an update, a (new) document,
- * which is in the form of text, is specified.
- */
-public class LineDocTextAndOp implements Writable {
- private DocumentAndOp.Op op;
- private Text doc;
-
- /**
- * Constructor
- */
- public LineDocTextAndOp() {
- doc = new Text();
- }
-
- /**
- * Set the type of the operation.
- * @param op the type of the operation
- */
- public void setOp(DocumentAndOp.Op op) {
- this.op = op;
- }
-
- /**
- * Get the type of the operation.
- * @return the type of the operation
- */
- public DocumentAndOp.Op getOp() {
- return op;
- }
-
- /**
- * Get the text that represents a document.
- * @return the text that represents a document
- */
- public Text getText() {
- return doc;
- }
-
- /* (non-Javadoc)
- * @see java.lang.Object#toString()
- */
- public String toString() {
- return this.getClass().getName() + "[op=" + op + ", text=" + doc + "]";
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
- */
- public void write(DataOutput out) throws IOException {
- throw new IOException(this.getClass().getName()
- + ".write should never be called");
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
- */
- public void readFields(DataInput in) throws IOException {
- throw new IOException(this.getClass().getName()
- + ".readFields should never be called");
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentAndOp;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents an operation. The operation can be an insert, a delete
+ * or an update. If the operation is an insert or an update, a (new) document,
+ * which is in the form of text, is specified.
+ */
+public class LineDocTextAndOp implements Writable {
+ private DocumentAndOp.Op op;
+ private Text doc;
+
+ /**
+ * Constructor
+ */
+ public LineDocTextAndOp() {
+ doc = new Text();
+ }
+
+ /**
+ * Set the type of the operation.
+ * @param op the type of the operation
+ */
+ public void setOp(DocumentAndOp.Op op) {
+ this.op = op;
+ }
+
+ /**
+ * Get the type of the operation.
+ * @return the type of the operation
+ */
+ public DocumentAndOp.Op getOp() {
+ return op;
+ }
+
+ /**
+ * Get the text that represents a document.
+ * @return the text that represents a document
+ */
+ public Text getText() {
+ return doc;
+ }
+
+ /* (non-Javadoc)
+ * @see java.lang.Object#toString()
+ */
+ public String toString() {
+ return this.getClass().getName() + "[op=" + op + ", text=" + doc + "]";
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
+ */
+ public void write(DataOutput out) throws IOException {
+ throw new IOException(this.getClass().getName()
+ + ".write should never be called");
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
+ */
+ public void readFields(DataInput in) throws IOException {
+ throw new IOException(this.getClass().getName()
+ + ".readFields should never be called");
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/example/RoundRobinDistributionPolicy.java Fri Oct 12 04:48:40 2012
@@ -1,58 +1,58 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.example;
-
-import org.apache.hadoop.contrib.index.mapred.DocumentID;
-import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
-import org.apache.hadoop.contrib.index.mapred.Shard;
-
-/**
- * Choose a shard for each insert in a round-robin fashion. Choose all the
- * shards for each delete because we don't know where it is stored.
- */
-public class RoundRobinDistributionPolicy implements IDistributionPolicy {
-
- private int numShards;
- private int rr; // round-robin implementation
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
- */
- public void init(Shard[] shards) {
- numShards = shards.length;
- rr = 0;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForInsert(DocumentID key) {
- int chosen = rr;
- rr = (rr + 1) % numShards;
- return chosen;
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
- */
- public int chooseShardForDelete(DocumentID key) {
- // -1 represents all the shards
- return -1;
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.example;
+
+import org.apache.hadoop.contrib.index.mapred.DocumentID;
+import org.apache.hadoop.contrib.index.mapred.IDistributionPolicy;
+import org.apache.hadoop.contrib.index.mapred.Shard;
+
+/**
+ * Choose a shard for each insert in a round-robin fashion. Choose all the
+ * shards for each delete because we don't know where it is stored.
+ */
+public class RoundRobinDistributionPolicy implements IDistributionPolicy {
+
+ private int numShards;
+ private int rr; // round-robin implementation
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#init(org.apache.hadoop.contrib.index.mapred.Shard[])
+ */
+ public void init(Shard[] shards) {
+ numShards = shards.length;
+ rr = 0;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForInsert(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForInsert(DocumentID key) {
+ int chosen = rr;
+ rr = (rr + 1) % numShards;
+ return chosen;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.contrib.index.mapred.IDistributionPolicy#chooseShardForDelete(org.apache.hadoop.contrib.index.mapred.DocumentID)
+ */
+ public int chooseShardForDelete(DocumentID key) {
+ // -1 represents all the shards
+ return -1;
+ }
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneIndexFileNameFilter.java Fri Oct 12 04:48:40 2012
@@ -1,55 +1,55 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexFileNameFilter;
-
-/**
- * A wrapper class to convert an IndexFileNameFilter which implements
- * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
- */
-class LuceneIndexFileNameFilter implements PathFilter {
-
- private static final LuceneIndexFileNameFilter singleton =
- new LuceneIndexFileNameFilter();
-
- /**
- * Get a static instance.
- * @return the static instance
- */
- public static LuceneIndexFileNameFilter getFilter() {
- return singleton;
- }
-
- private final IndexFileNameFilter luceneFilter;
-
- private LuceneIndexFileNameFilter() {
- luceneFilter = IndexFileNameFilter.getFilter();
- }
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
- */
- public boolean accept(Path path) {
- return luceneFilter.accept(null, path.getName());
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.lucene.index.IndexFileNameFilter;
+
+/**
+ * A wrapper class to convert an IndexFileNameFilter which implements
+ * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
+ */
+class LuceneIndexFileNameFilter implements PathFilter {
+
+ private static final LuceneIndexFileNameFilter singleton =
+ new LuceneIndexFileNameFilter();
+
+ /**
+ * Get a static instance.
+ * @return the static instance
+ */
+ public static LuceneIndexFileNameFilter getFilter() {
+ return singleton;
+ }
+
+ private final IndexFileNameFilter luceneFilter;
+
+ private LuceneIndexFileNameFilter() {
+ luceneFilter = IndexFileNameFilter.getFilter();
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
+ */
+ public boolean accept(Path path) {
+ return luceneFilter.accept(null, path.getName());
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/LuceneUtil.java Fri Oct 12 04:48:40 2012
@@ -1,112 +1,112 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.lucene.store.Directory;
-
-/**
- * This class copies some methods from Lucene's SegmentInfos since that class
- * is not public.
- */
-public final class LuceneUtil {
-
- static final class IndexFileNames {
- /** Name of the index segment file */
- static final String SEGMENTS = "segments";
-
- /** Name of the generation reference file name */
- static final String SEGMENTS_GEN = "segments.gen";
- }
-
- /**
- * Check if the file is a segments_N file
- * @param name
- * @return true if the file is a segments_N file
- */
- public static boolean isSegmentsFile(String name) {
- return name.startsWith(IndexFileNames.SEGMENTS)
- && !name.equals(IndexFileNames.SEGMENTS_GEN);
- }
-
- /**
- * Check if the file is the segments.gen file
- * @param name
- * @return true if the file is the segments.gen file
- */
- public static boolean isSegmentsGenFile(String name) {
- return name.equals(IndexFileNames.SEGMENTS_GEN);
- }
-
- /**
- * Get the generation (N) of the current segments_N file in the directory.
- *
- * @param directory -- directory to search for the latest segments_N file
- */
- public static long getCurrentSegmentGeneration(Directory directory)
- throws IOException {
- String[] files = directory.list();
- if (files == null)
- throw new IOException("cannot read directory " + directory
- + ": list() returned null");
- return getCurrentSegmentGeneration(files);
- }
-
- /**
- * Get the generation (N) of the current segments_N file from a list of
- * files.
- *
- * @param files -- array of file names to check
- */
- public static long getCurrentSegmentGeneration(String[] files) {
- if (files == null) {
- return -1;
- }
- long max = -1;
- for (int i = 0; i < files.length; i++) {
- String file = files[i];
- if (file.startsWith(IndexFileNames.SEGMENTS)
- && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
- long gen = generationFromSegmentsFileName(file);
- if (gen > max) {
- max = gen;
- }
- }
- }
- return max;
- }
-
- /**
- * Parse the generation off the segments file name and return it.
- */
- public static long generationFromSegmentsFileName(String fileName) {
- if (fileName.equals(IndexFileNames.SEGMENTS)) {
- return 0;
- } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
- return Long.parseLong(
- fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
- Character.MAX_RADIX);
- } else {
- throw new IllegalArgumentException("fileName \"" + fileName
- + "\" is not a segments file");
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+
+/**
+ * This class copies some methods from Lucene's SegmentInfos since that class
+ * is not public.
+ */
+public final class LuceneUtil {
+
+ static final class IndexFileNames {
+ /** Name of the index segment file */
+ static final String SEGMENTS = "segments";
+
+ /** Name of the generation reference file name */
+ static final String SEGMENTS_GEN = "segments.gen";
+ }
+
+ /**
+ * Check if the file is a segments_N file
+ * @param name
+ * @return true if the file is a segments_N file
+ */
+ public static boolean isSegmentsFile(String name) {
+ return name.startsWith(IndexFileNames.SEGMENTS)
+ && !name.equals(IndexFileNames.SEGMENTS_GEN);
+ }
+
+ /**
+ * Check if the file is the segments.gen file
+ * @param name
+ * @return true if the file is the segments.gen file
+ */
+ public static boolean isSegmentsGenFile(String name) {
+ return name.equals(IndexFileNames.SEGMENTS_GEN);
+ }
+
+ /**
+ * Get the generation (N) of the current segments_N file in the directory.
+ *
+ * @param directory -- directory to search for the latest segments_N file
+ */
+ public static long getCurrentSegmentGeneration(Directory directory)
+ throws IOException {
+ String[] files = directory.list();
+ if (files == null)
+ throw new IOException("cannot read directory " + directory
+ + ": list() returned null");
+ return getCurrentSegmentGeneration(files);
+ }
+
+ /**
+ * Get the generation (N) of the current segments_N file from a list of
+ * files.
+ *
+ * @param files -- array of file names to check
+ */
+ public static long getCurrentSegmentGeneration(String[] files) {
+ if (files == null) {
+ return -1;
+ }
+ long max = -1;
+ for (int i = 0; i < files.length; i++) {
+ String file = files[i];
+ if (file.startsWith(IndexFileNames.SEGMENTS)
+ && !file.equals(IndexFileNames.SEGMENTS_GEN)) {
+ long gen = generationFromSegmentsFileName(file);
+ if (gen > max) {
+ max = gen;
+ }
+ }
+ }
+ return max;
+ }
+
+ /**
+ * Parse the generation off the segments file name and return it.
+ */
+ public static long generationFromSegmentsFileName(String fileName) {
+ if (fileName.equals(IndexFileNames.SEGMENTS)) {
+ return 0;
+ } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) {
+ return Long.parseLong(
+ fileName.substring(1 + IndexFileNames.SEGMENTS.length()),
+ Character.MAX_RADIX);
+ } else {
+ throw new IllegalArgumentException("fileName \"" + fileName
+ + "\" is not a segments file");
+ }
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDeletionPolicy.java Fri Oct 12 04:48:40 2012
@@ -1,49 +1,49 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.lucene.index.IndexCommitPoint;
-import org.apache.lucene.index.IndexDeletionPolicy;
-
-/**
- * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
- * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
- * writable directory (initially empty, keep latest after init).
- */
-class MixedDeletionPolicy implements IndexDeletionPolicy {
-
- private int keepAllFromInit = 0;
-
- public void onInit(List commits) throws IOException {
- keepAllFromInit = commits.size();
- }
-
- public void onCommit(List commits) throws IOException {
- int size = commits.size();
- assert (size > keepAllFromInit);
- // keep all from init and the latest, delete the rest
- for (int i = keepAllFromInit; i < size - 1; i++) {
- ((IndexCommitPoint) commits.get(i)).delete();
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.index.IndexCommitPoint;
+import org.apache.lucene.index.IndexDeletionPolicy;
+
+/**
+ * For mixed directory. Use KeepAllDeletionPolicy for the read-only directory
+ * (keep all from init) and use KeepOnlyLastCommitDeletionPolicy for the
+ * writable directory (initially empty, keep latest after init).
+ */
+class MixedDeletionPolicy implements IndexDeletionPolicy {
+
+ private int keepAllFromInit = 0;
+
+ public void onInit(List commits) throws IOException {
+ keepAllFromInit = commits.size();
+ }
+
+ public void onCommit(List commits) throws IOException {
+ int size = commits.size();
+ assert (size > keepAllFromInit);
+ // keep all from init and the latest, delete the rest
+ for (int i = keepAllFromInit; i < size - 1; i++) {
+ ((IndexCommitPoint) commits.get(i)).delete();
+ }
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/MixedDirectory.java Fri Oct 12 04:48:40 2012
@@ -1,185 +1,185 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.NoLockFactory;
-
-/**
- * The initial version of an index is stored in a read-only FileSystem dir
- * (FileSystemDirectory). Index files created by newer versions are written to
- * a writable local FS dir (Lucene's FSDirectory). We should use the general
- * FileSystemDirectory for the writable dir as well. But have to use Lucene's
- * FSDirectory because currently Lucene does randome write and
- * FileSystemDirectory only supports sequential write.
- *
- * Note: We may delete files from the read-only FileSystem dir because there
- * can be some segment files from an uncommitted checkpoint. For the same
- * reason, we may create files in the writable dir which already exist in the
- * read-only dir and logically they overwrite the ones in the read-only dir.
- */
-class MixedDirectory extends Directory {
-
- private final Directory readDir; // FileSystemDirectory
- private final Directory writeDir; // Lucene's FSDirectory
-
- // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
-
- public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
- Path writePath, Configuration conf) throws IOException {
-
- try {
- readDir = new FileSystemDirectory(readFs, readPath, false, conf);
- // check writeFS is a local FS?
- writeDir = FSDirectory.getDirectory(writePath.toString());
-
- } catch (IOException e) {
- try {
- close();
- } catch (IOException e1) {
- // ignore this one, throw the original one
- }
- throw e;
- }
-
- lockFactory = new NoLockFactory();
- }
-
- // for debugging
- MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
- this.readDir = readDir;
- this.writeDir = writeDir;
-
- lockFactory = new NoLockFactory();
- }
-
- @Override
- public String[] list() throws IOException {
- String[] readFiles = readDir.list();
- String[] writeFiles = writeDir.list();
-
- if (readFiles == null || readFiles.length == 0) {
- return writeFiles;
- } else if (writeFiles == null || writeFiles.length == 0) {
- return readFiles;
- } else {
- String[] result = new String[readFiles.length + writeFiles.length];
- System.arraycopy(readFiles, 0, result, 0, readFiles.length);
- System.arraycopy(writeFiles, 0, result, readFiles.length,
- writeFiles.length);
- return result;
- }
- }
-
- @Override
- public void deleteFile(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- writeDir.deleteFile(name);
- }
- if (readDir.fileExists(name)) {
- readDir.deleteFile(name);
- }
- }
-
- @Override
- public boolean fileExists(String name) throws IOException {
- return writeDir.fileExists(name) || readDir.fileExists(name);
- }
-
- @Override
- public long fileLength(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.fileLength(name);
- } else {
- return readDir.fileLength(name);
- }
- }
-
- @Override
- public long fileModified(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.fileModified(name);
- } else {
- return readDir.fileModified(name);
- }
- }
-
- @Override
- public void renameFile(String from, String to) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void touchFile(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- writeDir.touchFile(name);
- } else {
- readDir.touchFile(name);
- }
- }
-
- @Override
- public IndexOutput createOutput(String name) throws IOException {
- return writeDir.createOutput(name);
- }
-
- @Override
- public IndexInput openInput(String name) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.openInput(name);
- } else {
- return readDir.openInput(name);
- }
- }
-
- @Override
- public IndexInput openInput(String name, int bufferSize) throws IOException {
- if (writeDir.fileExists(name)) {
- return writeDir.openInput(name, bufferSize);
- } else {
- return readDir.openInput(name, bufferSize);
- }
- }
-
- @Override
- public void close() throws IOException {
- try {
- if (readDir != null) {
- readDir.close();
- }
- } finally {
- if (writeDir != null) {
- writeDir.close();
- }
- }
- }
-
- public String toString() {
- return this.getClass().getName() + "@" + readDir + "&" + writeDir;
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.NoLockFactory;
+
+/**
+ * The initial version of an index is stored in a read-only FileSystem dir
+ * (FileSystemDirectory). Index files created by newer versions are written to
+ * a writable local FS dir (Lucene's FSDirectory). We should use the general
+ * FileSystemDirectory for the writable dir as well. But have to use Lucene's
+ * FSDirectory because currently Lucene does randome write and
+ * FileSystemDirectory only supports sequential write.
+ *
+ * Note: We may delete files from the read-only FileSystem dir because there
+ * can be some segment files from an uncommitted checkpoint. For the same
+ * reason, we may create files in the writable dir which already exist in the
+ * read-only dir and logically they overwrite the ones in the read-only dir.
+ */
+class MixedDirectory extends Directory {
+
+ private final Directory readDir; // FileSystemDirectory
+ private final Directory writeDir; // Lucene's FSDirectory
+
+ // take advantage of the fact that Lucene's FSDirectory.fileExists is faster
+
+ public MixedDirectory(FileSystem readFs, Path readPath, FileSystem writeFs,
+ Path writePath, Configuration conf) throws IOException {
+
+ try {
+ readDir = new FileSystemDirectory(readFs, readPath, false, conf);
+ // check writeFS is a local FS?
+ writeDir = FSDirectory.getDirectory(writePath.toString());
+
+ } catch (IOException e) {
+ try {
+ close();
+ } catch (IOException e1) {
+ // ignore this one, throw the original one
+ }
+ throw e;
+ }
+
+ lockFactory = new NoLockFactory();
+ }
+
+ // for debugging
+ MixedDirectory(Directory readDir, Directory writeDir) throws IOException {
+ this.readDir = readDir;
+ this.writeDir = writeDir;
+
+ lockFactory = new NoLockFactory();
+ }
+
+ @Override
+ public String[] list() throws IOException {
+ String[] readFiles = readDir.list();
+ String[] writeFiles = writeDir.list();
+
+ if (readFiles == null || readFiles.length == 0) {
+ return writeFiles;
+ } else if (writeFiles == null || writeFiles.length == 0) {
+ return readFiles;
+ } else {
+ String[] result = new String[readFiles.length + writeFiles.length];
+ System.arraycopy(readFiles, 0, result, 0, readFiles.length);
+ System.arraycopy(writeFiles, 0, result, readFiles.length,
+ writeFiles.length);
+ return result;
+ }
+ }
+
+ @Override
+ public void deleteFile(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ writeDir.deleteFile(name);
+ }
+ if (readDir.fileExists(name)) {
+ readDir.deleteFile(name);
+ }
+ }
+
+ @Override
+ public boolean fileExists(String name) throws IOException {
+ return writeDir.fileExists(name) || readDir.fileExists(name);
+ }
+
+ @Override
+ public long fileLength(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.fileLength(name);
+ } else {
+ return readDir.fileLength(name);
+ }
+ }
+
+ @Override
+ public long fileModified(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.fileModified(name);
+ } else {
+ return readDir.fileModified(name);
+ }
+ }
+
+ @Override
+ public void renameFile(String from, String to) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void touchFile(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ writeDir.touchFile(name);
+ } else {
+ readDir.touchFile(name);
+ }
+ }
+
+ @Override
+ public IndexOutput createOutput(String name) throws IOException {
+ return writeDir.createOutput(name);
+ }
+
+ @Override
+ public IndexInput openInput(String name) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.openInput(name);
+ } else {
+ return readDir.openInput(name);
+ }
+ }
+
+ @Override
+ public IndexInput openInput(String name, int bufferSize) throws IOException {
+ if (writeDir.fileExists(name)) {
+ return writeDir.openInput(name, bufferSize);
+ } else {
+ return readDir.openInput(name, bufferSize);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ if (readDir != null) {
+ readDir.close();
+ }
+ } finally {
+ if (writeDir != null) {
+ writeDir.close();
+ }
+ }
+ }
+
+ public String toString() {
+ return this.getClass().getName() + "@" + readDir + "&" + writeDir;
+ }
+
+}
Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java?rev=1397435&r1=1397434&r2=1397435&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java (original)
+++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/src/contrib/index/src/java/org/apache/hadoop/contrib/index/lucene/RAMDirectoryUtil.java Fri Oct 12 04:48:40 2012
@@ -1,119 +1,119 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.contrib.index.lucene;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMDirectory;
-
-/**
- * A utility class which writes an index in a ram dir into a DataOutput and
- * read from a DataInput an index into a ram dir.
- */
-public class RAMDirectoryUtil {
- private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
-
- /**
- * Write a number of files from a ram directory to a data output.
- * @param out the data output
- * @param dir the ram directory
- * @param names the names of the files to write
- * @throws IOException
- */
- public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
- String[] names) throws IOException {
- out.writeInt(names.length);
-
- for (int i = 0; i < names.length; i++) {
- Text.writeString(out, names[i]);
- long length = dir.fileLength(names[i]);
- out.writeLong(length);
-
- if (length > 0) {
- // can we avoid the extra copy?
- IndexInput input = null;
- try {
- input = dir.openInput(names[i], BUFFER_SIZE);
-
- int position = 0;
- byte[] buffer = new byte[BUFFER_SIZE];
-
- while (position < length) {
- int len =
- position + BUFFER_SIZE <= length ? BUFFER_SIZE
- : (int) (length - position);
- input.readBytes(buffer, 0, len);
- out.write(buffer, 0, len);
- position += len;
- }
- } finally {
- if (input != null) {
- input.close();
- }
- }
- }
- }
- }
-
- /**
- * Read a number of files from a data input to a ram directory.
- * @param in the data input
- * @param dir the ram directory
- * @throws IOException
- */
- public static void readRAMFiles(DataInput in, RAMDirectory dir)
- throws IOException {
- int numFiles = in.readInt();
-
- for (int i = 0; i < numFiles; i++) {
- String name = Text.readString(in);
- long length = in.readLong();
-
- if (length > 0) {
- // can we avoid the extra copy?
- IndexOutput output = null;
- try {
- output = dir.createOutput(name);
-
- int position = 0;
- byte[] buffer = new byte[BUFFER_SIZE];
-
- while (position < length) {
- int len =
- position + BUFFER_SIZE <= length ? BUFFER_SIZE
- : (int) (length - position);
- in.readFully(buffer, 0, len);
- output.writeBytes(buffer, 0, len);
- position += len;
- }
- } finally {
- if (output != null) {
- output.close();
- }
- }
- }
- }
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.contrib.index.lucene;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * A utility class which writes an index in a ram dir into a DataOutput and
+ * read from a DataInput an index into a ram dir.
+ */
+public class RAMDirectoryUtil {
+ private static final int BUFFER_SIZE = 1024; // RAMOutputStream.BUFFER_SIZE;
+
+ /**
+ * Write a number of files from a ram directory to a data output.
+ * @param out the data output
+ * @param dir the ram directory
+ * @param names the names of the files to write
+ * @throws IOException
+ */
+ public static void writeRAMFiles(DataOutput out, RAMDirectory dir,
+ String[] names) throws IOException {
+ out.writeInt(names.length);
+
+ for (int i = 0; i < names.length; i++) {
+ Text.writeString(out, names[i]);
+ long length = dir.fileLength(names[i]);
+ out.writeLong(length);
+
+ if (length > 0) {
+ // can we avoid the extra copy?
+ IndexInput input = null;
+ try {
+ input = dir.openInput(names[i], BUFFER_SIZE);
+
+ int position = 0;
+ byte[] buffer = new byte[BUFFER_SIZE];
+
+ while (position < length) {
+ int len =
+ position + BUFFER_SIZE <= length ? BUFFER_SIZE
+ : (int) (length - position);
+ input.readBytes(buffer, 0, len);
+ out.write(buffer, 0, len);
+ position += len;
+ }
+ } finally {
+ if (input != null) {
+ input.close();
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Read a number of files from a data input to a ram directory.
+ * @param in the data input
+ * @param dir the ram directory
+ * @throws IOException
+ */
+ public static void readRAMFiles(DataInput in, RAMDirectory dir)
+ throws IOException {
+ int numFiles = in.readInt();
+
+ for (int i = 0; i < numFiles; i++) {
+ String name = Text.readString(in);
+ long length = in.readLong();
+
+ if (length > 0) {
+ // can we avoid the extra copy?
+ IndexOutput output = null;
+ try {
+ output = dir.createOutput(name);
+
+ int position = 0;
+ byte[] buffer = new byte[BUFFER_SIZE];
+
+ while (position < length) {
+ int len =
+ position + BUFFER_SIZE <= length ? BUFFER_SIZE
+ : (int) (length - position);
+ in.readFully(buffer, 0, len);
+ output.writeBytes(buffer, 0, len);
+ position += len;
+ }
+ } finally {
+ if (output != null) {
+ output.close();
+ }
+ }
+ }
+ }
+ }
+
+}