You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/05 17:21:19 UTC
svn commit: r1227676 [1/3] - in /lucene/dev/trunk/lucene: ./
contrib/memory/src/java/org/apache/lucene/index/memory/
src/java/org/apache/lucene/codecs/
src/java/org/apache/lucene/codecs/lucene3x/
src/java/org/apache/lucene/codecs/lucene40/ src/java/org...
Author: simonw
Date: Thu Jan 5 16:21:17 2012
New Revision: 1227676
URL: http://svn.apache.org/viewvc?rev=1227676&view=rev
Log:
LUCENE-3628: Cut over Norms to DocValues
Added:
lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java
- copied, changed from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java
- copied, changed from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsConsumer.java
- copied, changed from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsProducer.java
- copied, changed from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsReader.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexNormsConsumer.java
- copied, changed from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsWriter.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWNormsFormat.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/index/MultiNorms.java
- copied unchanged from r1227522, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiNorms.java
Removed:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiNorms.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BaseMultiReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocValue.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/IndexReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/MultiDocValues.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumer.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/NormsConsumerPerField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/ParallelReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentCoreReaders.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SegmentReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/store/CompoundFileWriter.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/IOUtils.java
lucene/dev/trunk/lucene/src/test-framework/java/org/apache/lucene/codecs/preflexrw/PreFlexRWCodec.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/codecs/lucene40/TestDocValues.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestDocValuesIndexing.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Jan 5 16:21:17 2012
@@ -619,6 +619,9 @@ New features
* LUCENE-3638: Added sugar methods to IndexReader and IndexSearcher to
load only certain fields when loading a document. (Peter Chang via
Mike McCandless)
+
+* LUCENE-3628: Norms are represented as DocValues. IndexReader exposes
+ a #normValues(String) method to obtain norms per field. (Simon Willnauer)
Optimizations
Modified: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Jan 5 16:21:17 2012
@@ -48,6 +48,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleByteSource;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -1082,34 +1083,6 @@ public class MemoryIndex {
private void setSearcher(IndexSearcher searcher) {
this.searcher = searcher;
}
-
- /** performance hack: cache norms to avoid repeated expensive calculations */
- private byte[] cachedNorms;
- private String cachedFieldName;
- private SimilarityProvider cachedSimilarity;
-
- @Override
- public byte[] norms(String fieldName) {
- byte[] norms = cachedNorms;
- SimilarityProvider sim = getSimilarityProvider();
- if (!fieldName.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
- Info info = getInfo(fieldName);
- Similarity fieldSim = sim.get(fieldName);
- int numTokens = info != null ? info.numTokens : 0;
- int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
- float boost = info != null ? info.getBoost() : 1.0f;
- FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
- byte norm = fieldSim.computeNorm(invertState);
- norms = new byte[] {norm};
-
- // cache it for future reuse
- cachedNorms = norms;
- cachedFieldName = fieldName;
- cachedSimilarity = sim;
- if (DEBUG) System.err.println("MemoryIndexReader.norms: " + fieldName + ":" + norm + ":" + numTokens);
- }
- return norms;
- }
@Override
public int numDocs() {
@@ -1160,6 +1133,34 @@ public class MemoryIndex {
public DocValues docValues(String field) throws IOException {
return null;
}
+
+ /** performance hack: cache norms to avoid repeated expensive calculations */
+ private DocValues cachedNormValues;
+ private String cachedFieldName;
+ private SimilarityProvider cachedSimilarity;
+
+ @Override
+ public DocValues normValues(String field) throws IOException {
+ DocValues norms = cachedNormValues;
+ SimilarityProvider sim = getSimilarityProvider();
+ if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
+ Info info = getInfo(field);
+ Similarity fieldSim = sim.get(field);
+ int numTokens = info != null ? info.numTokens : 0;
+ int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
+ float boost = info != null ? info.getBoost() : 1.0f;
+ FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
+ byte norm = fieldSim.computeNorm(invertState);
+ SingleByteSource singleByteSource = new SingleByteSource(new byte[] {norm});
+ norms = new MemoryIndexNormDocValues(singleByteSource);
+ // cache it for future reuse
+ cachedNormValues = norms;
+ cachedFieldName = field;
+ cachedSimilarity = sim;
+ if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + norm + ":" + numTokens);
+ }
+ return norms;
+ }
}
Added: lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java?rev=1227676&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java (added)
+++ lucene/dev/trunk/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java Thu Jan 5 16:21:17 2012
@@ -0,0 +1,78 @@
+package org.apache.lucene.index.memory;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ *
+ * @lucene.internal
+ */
+class MemoryIndexNormDocValues extends DocValues {
+
+ private final Source source;
+
+ MemoryIndexNormDocValues(Source source) {
+ this.source = source;
+ }
+ @Override
+ public Source load() throws IOException {
+ return source;
+ }
+
+ @Override
+ public Source getDirectSource() throws IOException {
+ return source;
+ }
+
+ @Override
+ public Type type() {
+ return source.type();
+ }
+
+ public static class SingleByteSource extends Source {
+
+ private final byte[] bytes;
+
+ protected SingleByteSource(byte[] bytes) {
+ super(Type.BYTES_FIXED_STRAIGHT);
+ this.bytes = bytes;
+ }
+
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ ref.bytes = bytes;
+ ref.offset = docID;
+ ref.length = 1;
+ return ref;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return true;
+ }
+
+ @Override
+ public Object getArray() {
+ return bytes;
+ }
+
+ }
+
+}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Thu Jan 5 16:21:17 2012
@@ -17,10 +17,7 @@ package org.apache.lucene.codecs;
* limitations under the License.
*/
-import java.io.FileOutputStream;
import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
@@ -42,7 +39,6 @@ import org.apache.lucene.util.fst.ByteSe
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
-import org.apache.lucene.util.fst.Util;
/*
TODO:
@@ -641,6 +637,7 @@ public class BlockTreeTermsWriter extend
}
// for debugging
+ @SuppressWarnings("unused")
private String toString(BytesRef b) {
try {
return b.utf8ToString() + " " + b;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesConsumer.java Thu Jan 5 16:21:17 2012
@@ -18,11 +18,14 @@ package org.apache.lucene.codecs;
*/
import java.io.IOException;
+import org.apache.lucene.codecs.lucene40.values.Writer;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.DocValue;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
/**
* Abstract API that consumes {@link DocValue}s.
@@ -35,6 +38,9 @@ import org.apache.lucene.util.Bits;
*/
public abstract class DocValuesConsumer {
+ protected Source currentMergeSource;
+ protected final BytesRef spare = new BytesRef();
+
/**
* Adds the given {@link DocValue} instance to this
* {@link DocValuesConsumer}
@@ -83,6 +89,7 @@ public abstract class DocValuesConsumer
hasMerged = true;
merge(new SingleSubMergeState(docValues[readerIDX], mergeState.docBase[readerIDX], reader.reader.maxDoc(),
reader.liveDocs));
+ mergeState.checkAbort.work(reader.reader.maxDoc());
}
}
// only finish if no exception is thrown!
@@ -99,10 +106,112 @@ public abstract class DocValuesConsumer
* @throws IOException
* if an {@link IOException} occurs
*/
- // TODO: can't we have a default implementation here that merges naively with our apis?
- // this is how stored fields and term vectors work. its a pain to have to impl merging
- // (should be an optimization to override it)
- protected abstract void merge(SingleSubMergeState mergeState) throws IOException;
+ protected void merge(SingleSubMergeState state) throws IOException {
+ // This enables bulk copies in subclasses per MergeState, subclasses can
+ // simply override this and decide if they want to merge
+ // segments using this generic implementation or if a bulk merge is possible
+ // / feasible.
+ final Source source = state.reader.getDirectSource();
+ assert source != null;
+ setNextMergeSource(source); // set the current enum we are working on - the
+ // impl. will get the correct reference for the type
+ // it supports
+ int docID = state.docBase;
+ final Bits liveDocs = state.liveDocs;
+ final int docCount = state.docCount;
+ for (int i = 0; i < docCount; i++) {
+ if (liveDocs == null || liveDocs.get(i)) {
+ mergeDoc(docID++, i);
+ }
+ }
+ }
+
+ /**
+ * Records the specified <tt>long</tt> value for the docID or throws an
+ * {@link UnsupportedOperationException} if this {@link Writer} doesn't record
+ * <tt>long</tt> values.
+ *
+ * @throws UnsupportedOperationException
+ * if this writer doesn't record <tt>long</tt> values
+ */
+ protected void add(int docID, long value) throws IOException {
+ throw new UnsupportedOperationException("override this method to support integer types");
+ }
+
+ /**
+ * Records the specified <tt>double</tt> value for the docID or throws an
+ * {@link UnsupportedOperationException} if this {@link Writer} doesn't record
+ * <tt>double</tt> values.
+ *
+ * @throws UnsupportedOperationException
+ * if this writer doesn't record <tt>double</tt> values
+ */
+ protected void add(int docID, double value) throws IOException {
+ throw new UnsupportedOperationException("override this method to support floating point types");
+ }
+
+ /**
+ * Records the specified {@link BytesRef} value for the docID or throws an
+ * {@link UnsupportedOperationException} if this {@link Writer} doesn't record
+ * {@link BytesRef} values.
+ *
+ * @throws UnsupportedOperationException
+ * if this writer doesn't record {@link BytesRef} values
+ */
+ protected void add(int docID, BytesRef value) throws IOException {
+ throw new UnsupportedOperationException("override this method to support byte types");
+ }
+
+ /**
+ * Merges a document with the given <code>docID</code>. The methods
+ * implementation obtains the value for the <i>sourceDoc</i> id from the
+ * current {@link Source} set to <i>setNextMergeSource(Source)</i>.
+ * <p>
+ * This method is used during merging to provide implementation agnostic
+ * default merge implementation.
+ * </p>
+ * <p>
+ * All documents IDs between the given ID and the previously given ID or
+ * <tt>0</tt> if the method is call the first time are filled with default
+ * values depending on the {@link Writer} implementation. The given document
+ * ID must always be greater than the previous ID or <tt>0</tt> if called the
+ * first time.
+ */
+ protected void mergeDoc(int docID, int sourceDoc)
+ throws IOException {
+ switch(currentMergeSource.type()) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ add(docID, currentMergeSource.getBytes(sourceDoc, spare));
+ break;
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FIXED_INTS_8:
+ case VAR_INTS:
+ add(docID, currentMergeSource.getInt(sourceDoc));
+ break;
+ case FLOAT_32:
+ case FLOAT_64:
+ add(docID, currentMergeSource.getFloat(sourceDoc));
+ break;
+ }
+ }
+
+ /**
+ * Sets the next {@link Source} to consume values from on calls to
+ * {@link #mergeDoc(int, int)}
+ *
+ * @param mergeSource
+ * the next {@link Source}, this must not be null
+ */
+ protected final void setNextMergeSource(Source mergeSource) {
+ currentMergeSource = mergeSource;
+ }
/**
* Specialized auxiliary MergeState is necessary since we don't want to
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesReaderBase.java Thu Jan 5 16:21:17 2012
@@ -41,7 +41,7 @@ import org.apache.lucene.util.BytesRef;
*/
// TODO: this needs to go under lucene40 codec (its specific to its impl)
public abstract class DocValuesReaderBase extends PerDocProducer {
-
+
protected abstract void closeInternal(Collection<? extends Closeable> closeables) throws IOException;
protected abstract Map<String, DocValues> docValues();
@@ -68,14 +68,14 @@ public abstract class DocValuesReaderBas
try {
for (FieldInfo fieldInfo : fieldInfos) {
- if (fieldInfo.hasDocValues()) {
+ if (canLoad(fieldInfo)) {
final String field = fieldInfo.name;
// TODO can we have a compound file per segment and codec for
// docvalues?
final String id = DocValuesWriterBase.docValuesId(segment,
fieldInfo.number);
values.put(field,
- loadDocValues(docCount, dir, id, fieldInfo.getDocValuesType(), context));
+ loadDocValues(docCount, dir, id, getDocValuesType(fieldInfo), context));
}
}
success = true;
@@ -88,6 +88,18 @@ public abstract class DocValuesReaderBas
return values;
}
+ protected boolean canLoad(FieldInfo info) {
+ return info.hasDocValues();
+ }
+
+ protected Type getDocValuesType(FieldInfo info) {
+ return info.getDocValuesType();
+ }
+
+ protected boolean anyDocValuesFields(FieldInfos infos) {
+ return infos.anyDocValuesFields();
+ }
+
/**
* Loads a {@link DocValues} instance depending on the given {@link Type}.
* Codecs that use different implementations for a certain {@link Type} can
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/DocValuesWriterBase.java Thu Jan 5 16:21:17 2012
@@ -23,7 +23,6 @@ import java.util.Comparator;
import org.apache.lucene.codecs.lucene40.values.Writer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.PerDocWriteState;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type; // javadoc
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -38,7 +37,6 @@ import org.apache.lucene.util.Counter;
//TODO: this needs to go under lucene40 codec (its specific to its impl)
public abstract class DocValuesWriterBase extends PerDocConsumer {
protected final String segmentName;
- protected final String segmentSuffix;
private final Counter bytesUsed;
protected final IOContext context;
private final boolean fasterButMoreRam;
@@ -58,7 +56,6 @@ public abstract class DocValuesWriterBas
*/
protected DocValuesWriterBase(PerDocWriteState state, boolean fasterButMoreRam) {
this.segmentName = state.segmentName;
- this.segmentSuffix = state.segmentSuffix;
this.bytesUsed = state.bytesUsed;
this.context = state.context;
this.fasterButMoreRam = fasterButMoreRam;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/NormsFormat.java Thu Jan 5 16:21:17 2012
@@ -20,19 +20,19 @@ package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.Set;
-import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
/**
* format for normalization factors
*/
public abstract class NormsFormat {
- /** Note: separateNormsDir should not be used! */
- public abstract NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException;
- public abstract NormsWriter normsWriter(SegmentWriteState state) throws IOException;
+ public abstract PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException;
+ @Deprecated
+ public abstract PerDocProducer docsProducer(SegmentReadState state, Directory separateNormsDir) throws IOException;
+ public abstract PerDocProducer docsProducer(SegmentReadState state) throws IOException;
public abstract void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException;
/**
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/PerDocConsumer.java Thu Jan 5 16:21:17 2012
@@ -20,7 +20,9 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.DocValues.Type;
/**
* Abstract API that consumes per document values. Concrete implementations of
@@ -32,7 +34,7 @@ import org.apache.lucene.index.MergeStat
*
* @lucene.experimental
*/
-public abstract class PerDocConsumer implements Closeable{
+public abstract class PerDocConsumer implements Closeable {
/** Adds a new DocValuesField */
public abstract DocValuesConsumer addValuesField(DocValues.Type type, FieldInfo field)
throws IOException;
@@ -46,14 +48,57 @@ public abstract class PerDocConsumer imp
for (FieldInfo fieldInfo : mergeState.fieldInfos) {
mergeState.fieldInfo = fieldInfo; // set the field we are merging
- if (fieldInfo.hasDocValues()) {
+ if (canMerge(fieldInfo)) {
for (int i = 0; i < docValues.length; i++) {
- docValues[i] = mergeState.readers.get(i).reader.docValues(fieldInfo.name);
+ docValues[i] = getDocValuesForMerge(mergeState.readers.get(i).reader, fieldInfo);
}
- final DocValuesConsumer docValuesConsumer = addValuesField(fieldInfo.getDocValuesType(), fieldInfo);
+ final DocValuesConsumer docValuesConsumer = addValuesField(getDocValuesType(fieldInfo), fieldInfo);
assert docValuesConsumer != null;
docValuesConsumer.merge(mergeState, docValues);
}
}
- }
+ }
+
+ /**
+ * Returns a {@link DocValues} instance for merging from the given reader for the given
+ * {@link FieldInfo}. This method is used for merging and uses
+ * {@link IndexReader#docValues(String)} by default.
+ * <p>
+ * To enable {@link DocValues} merging for different {@link DocValues} than
+ * the default override this method accordingly.
+ * <p>
+ */
+ protected DocValues getDocValuesForMerge(IndexReader reader, FieldInfo info) throws IOException {
+ return reader.docValues(info.name);
+ }
+
+ /**
+ * Returns <code>true</code> iff the given field can be merged ie. has {@link DocValues}.
+ * By default this method uses {@link FieldInfo#hasDocValues()}.
+ * <p>
+ * To enable {@link DocValues} merging for different {@link DocValues} than
+ * the default override this method accordingly.
+ * <p>
+ */
+ protected boolean canMerge(FieldInfo info) {
+ return info.hasDocValues();
+ }
+
+ /**
+ * Returns the {@link DocValues} {@link Type} for the given {@link FieldInfo}.
+ * By default this method uses {@link FieldInfo#getDocValuesType()}.
+ * <p>
+ * To enable {@link DocValues} merging for different {@link DocValues} than
+ * the default override this method accordingly.
+ * <p>
+ */
+ protected Type getDocValuesType(FieldInfo info) {
+ return info.getDocValuesType();
+ }
+
+ /**
+ * Called during indexing if the indexing session is aborted due to a unrecoverable exception.
+ * This method should cleanup all resources.
+ */
+ public abstract void abort();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xCodec.java Thu Jan 5 16:21:17 2012
@@ -31,7 +31,6 @@ import org.apache.lucene.codecs.SegmentI
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
-import org.apache.lucene.codecs.lucene40.Lucene40NormsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfosFormat;
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
@@ -65,7 +64,7 @@ public class Lucene3xCodec extends Codec
private final SegmentInfosFormat infosFormat = new Lucene40SegmentInfosFormat();
// TODO: this should really be a different impl
- private final NormsFormat normsFormat = new Lucene40NormsFormat();
+ private final NormsFormat normsFormat = new Lucene3xNormsFormat();
// 3.x doesn't support docvalues
private final DocValuesFormat docValuesFormat = new DocValuesFormat() {
Copied: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java (from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java?p2=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java&r1=1226391&r2=1227676&rev=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsFormat.java Thu Jan 5 16:21:17 2012
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.lucene40;
+package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -21,33 +21,45 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.NormsReader;
-import org.apache.lucene.codecs.NormsWriter;
-import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.codecs.PerDocConsumer;
+import org.apache.lucene.codecs.PerDocProducer;
+import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-public class Lucene40NormsFormat extends NormsFormat {
+/**
+ * Read-Only Lucene 3.x Norms Format
+ *
+ * @lucene.experimental
+ */
+public class Lucene3xNormsFormat extends NormsFormat {
+
@Override
- public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
- return new Lucene40NormsReader(dir, info, fields, context, separateNormsDir);
+ public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
+ Lucene3xNormsProducer.files(dir, info, files);
}
@Override
- public NormsWriter normsWriter(SegmentWriteState state) throws IOException {
- return new Lucene40NormsWriter(state.directory, state.segmentName, state.context);
+ public void separateFiles(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
+ Lucene3xNormsProducer.separateFiles(dir, info, files);
}
+
@Override
- public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- Lucene40NormsReader.files(dir, info, files);
+ public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
+ throw new IllegalArgumentException("this codec can only be used for reading");
}
@Override
- public void separateFiles(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- Lucene40NormsReader.separateFiles(dir, info, files);
+ public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
+ return docsProducer(state, null);
+ }
+
+ @Override
+ public PerDocProducer docsProducer(SegmentReadState state,
+ Directory separateNormsDir) throws IOException {
+ return new Lucene3xNormsProducer(state.dir, state.segmentInfo, state.fieldInfos, state.context, separateNormsDir);
}
}
Copied: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java (from r1226391, lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java?p2=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java&r1=1226391&r2=1227676&rev=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene3x/Lucene3xNormsProducer.java Thu Jan 5 16:21:17 2012
@@ -1,4 +1,4 @@
-package org.apache.lucene.codecs.lucene40;
+package org.apache.lucene.codecs.lucene3x;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -24,7 +24,10 @@ import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
-import org.apache.lucene.codecs.NormsReader;
+import org.apache.lucene.codecs.PerDocProducer;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Source;
+import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
@@ -32,14 +35,29 @@ import org.apache.lucene.index.SegmentIn
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.MapBackedSet;
import org.apache.lucene.util.StringHelper;
-public class Lucene40NormsReader extends NormsReader {
- // this would be replaced by Source/SourceCache in a dv impl.
- // for now we have our own mini-version
- final Map<String,Norm> norms = new HashMap<String,Norm>();
+/**
+ * Reads Lucene 3.x norms format and exposes it via DocValues API
+ * @lucene.experimental
+ */
+class Lucene3xNormsProducer extends PerDocProducer {
+
+ /** norms header placeholder */
+ static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
+
+ /** Extension of norms file */
+ static final String NORMS_EXTENSION = "nrm";
+
+ /** Extension of separate norms file
+ * @deprecated */
+ @Deprecated
+ static final String SEPARATE_NORMS_EXTENSION = "s";
+
+ final Map<String,NormsDocValues> norms = new HashMap<String,NormsDocValues>();
// any .nrm or .sNN files we have open at any time.
// TODO: just a list, and double-close() separate norms files?
final Set<IndexInput> openFiles = new MapBackedSet<IndexInput>(new IdentityHashMap<IndexInput,Boolean>());
@@ -49,20 +67,20 @@ public class Lucene40NormsReader extends
// note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front.
// but we just don't do any seeks or reading yet.
- public Lucene40NormsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
+ public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
maxdoc = info.docCount;
String segmentName = info.name;
Map<Integer,Long> normGen = info.getNormGen();
boolean success = false;
try {
- long nextNormSeek = Lucene40NormsWriter.NORMS_HEADER.length; //skip header (header unused for now)
+ long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now)
for (FieldInfo fi : fields) {
if (fi.isIndexed && !fi.omitNorms) {
String fileName = getNormFilename(segmentName, normGen, fi.number);
Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir;
// singleNormFile means multiple norms share this file
- boolean singleNormFile = IndexFileNames.matchesExtension(fileName, Lucene40NormsWriter.NORMS_EXTENSION);
+ boolean singleNormFile = IndexFileNames.matchesExtension(fileName, NORMS_EXTENSION);
IndexInput normInput = null;
long normSeek;
@@ -90,19 +108,16 @@ public class Lucene40NormsReader extends
if (isUnversioned) {
normSeek = 0;
} else {
- normSeek = Lucene40NormsWriter.NORMS_HEADER.length;
+ normSeek = NORMS_HEADER.length;
}
}
-
- Norm norm = new Norm();
- norm.file = normInput;
- norm.offset = normSeek;
+ NormsDocValues norm = new NormsDocValues(normInput, normSeek);
norms.put(fi.name, norm);
nextNormSeek += maxdoc; // increment also if some norms are separate
}
}
// TODO: change to a real check? see LUCENE-3619
- assert singleNormStream == null || nextNormSeek == singleNormStream.length();
+ assert singleNormStream == null || nextNormSeek == singleNormStream.length() : singleNormStream != null ? "len: " + singleNormStream.length() + " expected: " + nextNormSeek : "null";
success = true;
} finally {
if (!success) {
@@ -112,12 +127,10 @@ public class Lucene40NormsReader extends
}
@Override
- public byte[] norms(String name) throws IOException {
- Norm norm = norms.get(name);
- return norm == null ? null : norm.bytes();
+ public DocValues docValues(String field) throws IOException {
+ return norms.get(field);
}
-
@Override
public void close() throws IOException {
try {
@@ -130,10 +143,10 @@ public class Lucene40NormsReader extends
private static String getNormFilename(String segmentName, Map<Integer,Long> normGen, int number) {
if (hasSeparateNorms(normGen, number)) {
- return IndexFileNames.fileNameFromGeneration(segmentName, Lucene40NormsWriter.SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
+ return IndexFileNames.fileNameFromGeneration(segmentName, SEPARATE_NORMS_EXTENSION + number, normGen.get(number));
} else {
// single file for all norms
- return IndexFileNames.fileNameFromGeneration(segmentName, Lucene40NormsWriter.NORMS_EXTENSION, SegmentInfo.WITHOUT_GEN);
+ return IndexFileNames.fileNameFromGeneration(segmentName, NORMS_EXTENSION, SegmentInfo.WITHOUT_GEN);
}
}
@@ -146,34 +159,38 @@ public class Lucene40NormsReader extends
return gen != null && gen.longValue() != SegmentInfo.NO;
}
- class Norm {
- IndexInput file;
- long offset;
- byte bytes[];
+ static final class NormSource extends Source {
+ protected NormSource(byte[] bytes) {
+ super(Type.BYTES_FIXED_STRAIGHT);
+ this.bytes = bytes;
+ }
+
+ final byte bytes[];
- synchronized byte[] bytes() throws IOException {
- if (bytes == null) {
- bytes = new byte[maxdoc];
- // some norms share fds
- synchronized(file) {
- file.seek(offset);
- file.readBytes(bytes, 0, bytes.length, false);
- }
- // we are done with this file
- if (file != singleNormStream) {
- openFiles.remove(file);
- file.close();
- file = null;
- }
- }
+ @Override
+ public BytesRef getBytes(int docID, BytesRef ref) {
+ ref.bytes = bytes;
+ ref.offset = docID;
+ ref.length = 1;
+ return ref;
+ }
+
+ @Override
+ public boolean hasArray() {
+ return true;
+ }
+
+ @Override
+ public Object getArray() {
return bytes;
}
+
}
static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
// TODO: This is what SI always did... but we can do this cleaner?
// like first FI that has norms but doesn't have separate norms?
- final String normsFileName = IndexFileNames.segmentFileName(info.name, "", Lucene40NormsWriter.NORMS_EXTENSION);
+ final String normsFileName = IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION);
if (dir.fileExists(normsFileName)) {
files.add(normsFileName);
}
@@ -188,9 +205,49 @@ public class Lucene40NormsReader extends
long gen = entry.getValue();
if (gen >= SegmentInfo.YES) {
// Definitely a separate norm file, with generation:
- files.add(IndexFileNames.fileNameFromGeneration(info.name, Lucene40NormsWriter.SEPARATE_NORMS_EXTENSION + entry.getKey(), gen));
+ files.add(IndexFileNames.fileNameFromGeneration(info.name, SEPARATE_NORMS_EXTENSION + entry.getKey(), gen));
}
}
}
}
+
+ private class NormsDocValues extends DocValues {
+ private final IndexInput file;
+ private final long offset;
+ public NormsDocValues(IndexInput normInput, long normSeek) {
+ this.file = normInput;
+ this.offset = normSeek;
+ }
+
+ @Override
+ public Source load() throws IOException {
+ return new NormSource(bytes());
+ }
+
+ @Override
+ public Source getDirectSource() throws IOException {
+ return getSource();
+ }
+
+ @Override
+ public Type type() {
+ return Type.BYTES_FIXED_STRAIGHT;
+ }
+
+ byte[] bytes() throws IOException {
+ byte[] bytes = new byte[maxdoc];
+ // some norms share fds
+ synchronized(file) {
+ file.seek(offset);
+ file.readBytes(bytes, 0, bytes.length, false);
+ }
+ // we are done with this file
+ if (file != singleNormStream) {
+ openFiles.remove(file);
+ file.close();
+ }
+ return bytes;
+ }
+
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java Thu Jan 5 16:21:17 2012
@@ -28,6 +28,7 @@ import org.apache.lucene.index.PerDocWri
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
/**
* Default PerDocConsumer implementation that uses compound file.
@@ -36,11 +37,13 @@ import org.apache.lucene.store.Directory
public class Lucene40DocValuesConsumer extends DocValuesWriterBase {
private final Directory mainDirectory;
private Directory directory;
+ private final String segmentSuffix;
+ public final static String DOC_VALUES_SEGMENT_SUFFIX = "dv";
- final static String DOC_VALUES_SEGMENT_SUFFIX = "dv";
-
- public Lucene40DocValuesConsumer(PerDocWriteState state) throws IOException {
+
+ public Lucene40DocValuesConsumer(PerDocWriteState state, String segmentSuffix) throws IOException {
super(state);
+ this.segmentSuffix = segmentSuffix;
mainDirectory = state.directory;
//TODO maybe we should enable a global CFS that all codecs can pull on demand to further reduce the number of files?
}
@@ -50,7 +53,7 @@ public class Lucene40DocValuesConsumer e
// lazy init
if (directory == null) {
directory = new CompoundFileDirectory(mainDirectory,
- IndexFileNames.segmentFileName(segmentName, DOC_VALUES_SEGMENT_SUFFIX,
+ IndexFileNames.segmentFileName(segmentName, segmentSuffix,
IndexFileNames.COMPOUND_FILE_EXTENSION), context, true);
}
return directory;
@@ -75,4 +78,15 @@ public class Lucene40DocValuesConsumer e
}
}
}
+
+ @Override
+ public void abort() {
+ try {
+ close();
+ } catch (IOException ignored) {}
+ IOUtils.deleteFilesIgnoringExceptions(mainDirectory, IndexFileNames.segmentFileName(
+ segmentName, segmentSuffix, IndexFileNames.COMPOUND_FILE_EXTENSION),
+ IndexFileNames.segmentFileName(segmentName, segmentSuffix,
+ IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
+ }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java Thu Jan 5 16:21:17 2012
@@ -32,12 +32,12 @@ public class Lucene40DocValuesFormat ext
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
- return new Lucene40DocValuesConsumer(state);
+ return new Lucene40DocValuesConsumer(state, Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX);
}
@Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
- return new Lucene40DocValuesProducer(state);
+ return new Lucene40DocValuesProducer(state, Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java Thu Jan 5 16:21:17 2012
@@ -39,16 +39,15 @@ import org.apache.lucene.util.IOUtils;
public class Lucene40DocValuesProducer extends DocValuesReaderBase {
protected final TreeMap<String,DocValues> docValues;
private final Directory cfs;
-
/**
* Creates a new {@link Lucene40DocValuesProducer} instance and loads all
* {@link DocValues} instances for this segment and codec.
*/
- public Lucene40DocValuesProducer(SegmentReadState state) throws IOException {
- if (state.fieldInfos.anyDocValuesFields()) {
+ public Lucene40DocValuesProducer(SegmentReadState state, String segmentSuffix) throws IOException {
+ if (anyDocValuesFields(state.fieldInfos)) {
cfs = new CompoundFileDirectory(state.dir,
IndexFileNames.segmentFileName(state.segmentInfo.name,
- Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION),
+ segmentSuffix, IndexFileNames.COMPOUND_FILE_EXTENSION),
state.context, false);
docValues = load(state.fieldInfos, state.segmentInfo.name, state.segmentInfo.docCount, cfs, state.context);
} else {
Added: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java?rev=1227676&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java (added)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java Thu Jan 5 16:21:17 2012
@@ -0,0 +1,137 @@
+package org.apache.lucene.codecs.lucene40;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.codecs.FieldInfosWriter;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+
+/**
+ * @lucene.experimental
+ */
+public class Lucene40FieldInfosWriter extends FieldInfosWriter {
+
+ /** Extension of field infos */
+ static final String FIELD_INFOS_EXTENSION = "fnm";
+
+ // First used in 2.9; prior to 2.9 there was no format header
+ static final int FORMAT_START = -2;
+ // First used in 3.4: omit only positional information
+ static final int FORMAT_OMIT_POSITIONS = -3;
+ // per-field codec support, records index values for fields
+ static final int FORMAT_FLEX = -4;
+
+ // whenever you add a new format, make it 1 smaller (negative version logic)!
+ static final int FORMAT_CURRENT = FORMAT_FLEX;
+
+ static final byte IS_INDEXED = 0x1;
+ static final byte STORE_TERMVECTOR = 0x2;
+ static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
+ static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
+ static final byte OMIT_NORMS = 0x10;
+ static final byte STORE_PAYLOADS = 0x20;
+ static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
+ static final byte OMIT_POSITIONS = -128;
+
+ @Override
+ public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
+ final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
+ IndexOutput output = directory.createOutput(fileName, context);
+ try {
+ output.writeVInt(FORMAT_CURRENT);
+ output.writeVInt(infos.size());
+ for (FieldInfo fi : infos) {
+ assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
+ byte bits = 0x0;
+ if (fi.isIndexed) bits |= IS_INDEXED;
+ if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
+ if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
+ if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
+ if (fi.omitNorms) bits |= OMIT_NORMS;
+ if (fi.storePayloads) bits |= STORE_PAYLOADS;
+ if (fi.indexOptions == IndexOptions.DOCS_ONLY)
+ bits |= OMIT_TERM_FREQ_AND_POSITIONS;
+ else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS)
+ bits |= OMIT_POSITIONS;
+ output.writeString(fi.name);
+ output.writeInt(fi.number);
+ output.writeByte(bits);
+
+ final byte b;
+
+ if (!fi.hasDocValues()) {
+ b = 0;
+ } else {
+ switch(fi.getDocValuesType()) {
+ case VAR_INTS:
+ b = 1;
+ break;
+ case FLOAT_32:
+ b = 2;
+ break;
+ case FLOAT_64:
+ b = 3;
+ break;
+ case BYTES_FIXED_STRAIGHT:
+ b = 4;
+ break;
+ case BYTES_FIXED_DEREF:
+ b = 5;
+ break;
+ case BYTES_VAR_STRAIGHT:
+ b = 6;
+ break;
+ case BYTES_VAR_DEREF:
+ b = 7;
+ break;
+ case FIXED_INTS_16:
+ b = 8;
+ break;
+ case FIXED_INTS_32:
+ b = 9;
+ break;
+ case FIXED_INTS_64:
+ b = 10;
+ break;
+ case FIXED_INTS_8:
+ b = 11;
+ break;
+ case BYTES_FIXED_SORTED:
+ b = 12;
+ break;
+ case BYTES_VAR_SORTED:
+ b = 13;
+ break;
+ default:
+ throw new IllegalStateException("unhandled indexValues type " + fi.getDocValuesType());
+ }
+ }
+ output.writeByte(b);
+ }
+ } finally {
+ output.close();
+ }
+ }
+
+}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java Thu Jan 5 16:21:17 2012
@@ -1,5 +1,4 @@
package org.apache.lucene.codecs.lucene40;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,38 +15,117 @@ package org.apache.lucene.codecs.lucene4
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.codecs.NormsFormat;
-import org.apache.lucene.codecs.NormsReader;
-import org.apache.lucene.codecs.NormsWriter;
+import org.apache.lucene.codecs.PerDocConsumer;
+import org.apache.lucene.codecs.PerDocProducer;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
+/**
+ * Norms Format for the default codec.
+ * @lucene.experimental
+ */
public class Lucene40NormsFormat extends NormsFormat {
-
+ private final static String NORMS_SEGMENT_SUFFIX = "nrm";
+
@Override
- public NormsReader normsReader(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context, Directory separateNormsDir) throws IOException {
- return new Lucene40NormsReader(dir, info, fields, context, separateNormsDir);
+ public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
+ return new Lucene40NormsDocValuesConsumer(state, NORMS_SEGMENT_SUFFIX);
}
@Override
- public NormsWriter normsWriter(SegmentWriteState state) throws IOException {
- return new Lucene40NormsWriter(state.directory, state.segmentName, state.context);
+ public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
+ return new Lucene40NormsDocValuesProducer(state, NORMS_SEGMENT_SUFFIX);
}
@Override
- public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- Lucene40NormsReader.files(dir, info, files);
+ public void files(Directory dir, SegmentInfo info, Set<String> files)
+ throws IOException {
+ Lucene40NormsDocValuesConsumer.files(dir, info, files);
+
}
@Override
- public void separateFiles(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
- Lucene40NormsReader.separateFiles(dir, info, files);
+ public PerDocProducer docsProducer(SegmentReadState state,
+ Directory separateNormsDir) throws IOException {
+ return docsProducer(state);
}
+
+
+ public static class Lucene40NormsDocValuesProducer extends Lucene40DocValuesProducer {
+
+ public Lucene40NormsDocValuesProducer(SegmentReadState state,
+ String segmentSuffix) throws IOException {
+ super(state, segmentSuffix);
+ }
+
+ @Override
+ protected boolean canLoad(FieldInfo info) {
+ return !info.omitNorms && info.isIndexed;
+ }
+
+ @Override
+ protected Type getDocValuesType(FieldInfo info) {
+ return Type.BYTES_FIXED_STRAIGHT;
+ }
+
+ @Override
+ protected boolean anyDocValuesFields(FieldInfos infos) {
+ return infos.hasNorms();
+ }
+
+ }
+
+ public static class Lucene40NormsDocValuesConsumer extends Lucene40DocValuesConsumer {
+
+ public Lucene40NormsDocValuesConsumer(PerDocWriteState state,
+ String segmentSuffix) throws IOException {
+ super(state, segmentSuffix);
+ }
+
+ @Override
+ protected DocValues getDocValuesForMerge(IndexReader reader, FieldInfo info)
+ throws IOException {
+ return reader.normValues(info.name);
+ }
+
+ @Override
+ protected boolean canMerge(FieldInfo info) {
+ return !info.omitNorms && info.isIndexed;
+ }
+
+ @Override
+ protected Type getDocValuesType(FieldInfo info) {
+ return Type.BYTES_FIXED_STRAIGHT;
+ }
+
+ public static void files(Directory dir, SegmentInfo segmentInfo, Set<String> files) throws IOException {
+ FieldInfos fieldInfos = segmentInfo.getFieldInfos();
+ for (FieldInfo fieldInfo : fieldInfos) {
+ if (!fieldInfo.omitNorms && fieldInfo.isIndexed) {
+ files.add(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION));
+ files.add(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
+ assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
+ assert dir.fileExists(IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION));
+ break;
+ }
+ }
+ }
+
+ }
+
+
+
+
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java Thu Jan 5 16:21:17 2012
@@ -122,14 +122,9 @@ public final class Lucene40StoredFieldsW
try {
close();
} catch (IOException ignored) {}
-
- try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION));
- } catch (IOException ignored) {}
-
- try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION));
- } catch (IOException ignored) {}
+ IOUtils.deleteFilesIgnoringExceptions(directory,
+ IndexFileNames.segmentFileName(segment, "", FIELDS_EXTENSION),
+ IndexFileNames.segmentFileName(segment, "", FIELDS_INDEX_EXTENSION));
}
public final void writeField(FieldInfo info, IndexableField field) throws IOException {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java Thu Jan 5 16:21:17 2012
@@ -202,18 +202,9 @@ public final class Lucene40TermVectorsWr
try {
close();
} catch (IOException ignored) {}
-
- try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION));
- } catch (IOException ignored) {}
-
- try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION));
- } catch (IOException ignored) {}
-
- try {
- directory.deleteFile(IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION));
- } catch (IOException ignored) {}
+ IOUtils.deleteFilesIgnoringExceptions(directory, IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_INDEX_EXTENSION),
+ IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_DOCUMENTS_EXTENSION),
+ IndexFileNames.segmentFileName(segment, "", Lucene40TermVectorsReader.VECTORS_FIELDS_EXTENSION));
}
/**
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java Thu Jan 5 16:21:17 2012
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
+import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.DocValue;
@@ -122,7 +123,7 @@ public final class Bytes {
* @throws IOException
* if the files for the writer can not be created.
*/
- public static Writer getWriter(Directory dir, String id, Mode mode,
+ public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
boolean fixedSize, Comparator<BytesRef> sortComparator,
Counter bytesUsed, IOContext context, boolean fasterButMoreRam)
throws IOException {
@@ -295,7 +296,8 @@ public final class Bytes {
* skipped; they will be filled with 0 bytes.
*/
@Override
- public abstract void add(int docID, BytesRef bytes) throws IOException;
+ protected
+ abstract void add(int docID, BytesRef bytes) throws IOException;
@Override
public abstract void finish(int docCount) throws IOException;
@@ -431,7 +433,7 @@ public final class Bytes {
}
@Override
- public void add(int docID, BytesRef bytes) throws IOException {
+ protected void add(int docID, BytesRef bytes) throws IOException {
if (bytes.length == 0) { // default value - skip it
return;
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java Thu Jan 5 16:21:17 2012
@@ -64,7 +64,7 @@ class FixedStraightBytesImpl {
}
@Override
- public void add(int docID, BytesRef bytes) throws IOException {
+ protected void add(int docID, BytesRef bytes) throws IOException {
assert lastDocID < docID;
if (size == -1) {
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java Thu Jan 5 16:21:17 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene4
*/
import java.io.IOException;
+import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValue;
import org.apache.lucene.index.DocValues.Source;
@@ -44,7 +45,7 @@ public class Floats {
protected static final int VERSION_START = 0;
protected static final int VERSION_CURRENT = VERSION_START;
- public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
+ public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, Type type) throws IOException {
return new FloatsWriter(dir, id, bytesUsed, context, type);
}
@@ -79,7 +80,7 @@ public class Floats {
assert template != null;
}
- public void add(int docID, double v) throws IOException {
+ protected void add(int docID, double v) throws IOException {
template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java Thu Jan 5 16:21:17 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene4
import java.io.IOException;
+import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.index.DocValue;
@@ -42,7 +43,7 @@ public final class Ints {
private Ints() {
}
- public static Writer getWriter(Directory dir, String id, Counter bytesUsed,
+ public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed,
Type type, IOContext context) throws IOException {
return type == Type.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
bytesUsed, context) : new IntsWriter(dir, id, bytesUsed, context, type);
@@ -103,7 +104,7 @@ public final class Ints {
}
@Override
- public void add(int docID, long v) throws IOException {
+ protected void add(int docID, long v) throws IOException {
template.toBytes(v, bytesRef);
add(docID, bytesRef);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java Thu Jan 5 16:21:17 2012
@@ -63,7 +63,7 @@ class PackedIntValues {
}
@Override
- public void add(int docID, long v) throws IOException {
+ protected void add(int docID, long v) throws IOException {
assert lastDocId < docID;
if (!started) {
started = true;
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java Thu Jan 5 16:21:17 2012
@@ -80,7 +80,7 @@ class VarStraightBytesImpl {
}
@Override
- public void add(int docID, BytesRef bytes) throws IOException {
+ protected void add(int docID, BytesRef bytes) throws IOException {
assert !merge;
if (bytes.length == 0) {
return; // default
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java Thu Jan 5 16:21:17 2012
@@ -20,11 +20,9 @@ import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.codecs.DocValuesConsumer;
-import org.apache.lucene.index.DocValues.Source;
import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
@@ -41,7 +39,6 @@ import org.apache.lucene.util.Counter;
* @lucene.experimental
*/
public abstract class Writer extends DocValuesConsumer {
- protected Source currentMergeSource;
protected final Counter bytesUsed;
/**
@@ -67,102 +64,6 @@ public abstract class Writer extends Doc
public static final String DATA_EXTENSION = "dat";
/**
- * Records the specified <tt>long</tt> value for the docID or throws an
- * {@link UnsupportedOperationException} if this {@link Writer} doesn't record
- * <tt>long</tt> values.
- *
- * @throws UnsupportedOperationException
- * if this writer doesn't record <tt>long</tt> values
- */
- public void add(int docID, long value) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Records the specified <tt>double</tt> value for the docID or throws an
- * {@link UnsupportedOperationException} if this {@link Writer} doesn't record
- * <tt>double</tt> values.
- *
- * @throws UnsupportedOperationException
- * if this writer doesn't record <tt>double</tt> values
- */
- public void add(int docID, double value) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Records the specified {@link BytesRef} value for the docID or throws an
- * {@link UnsupportedOperationException} if this {@link Writer} doesn't record
- * {@link BytesRef} values.
- *
- * @throws UnsupportedOperationException
- * if this writer doesn't record {@link BytesRef} values
- */
- public void add(int docID, BytesRef value) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- /**
- * Merges a document with the given <code>docID</code>. The methods
- * implementation obtains the value for the <i>sourceDoc</i> id from the
- * current {@link Source} set to <i>setNextMergeSource(Source)</i>.
- * <p>
- * This method is used during merging to provide implementation agnostic
- * default merge implementation.
- * </p>
- * <p>
- * All documents IDs between the given ID and the previously given ID or
- * <tt>0</tt> if the method is call the first time are filled with default
- * values depending on the {@link Writer} implementation. The given document
- * ID must always be greater than the previous ID or <tt>0</tt> if called the
- * first time.
- */
- protected abstract void mergeDoc(int docID, int sourceDoc) throws IOException;
-
- /**
- * Sets the next {@link Source} to consume values from on calls to
- * {@link #mergeDoc(int, int)}
- *
- * @param mergeSource
- * the next {@link Source}, this must not be null
- */
- protected void setNextMergeSource(Source mergeSource) {
- currentMergeSource = mergeSource;
- }
-
- /**
- * Finish writing and close any files and resources used by this Writer.
- *
- * @param docCount
- * the total number of documents for this writer. This must be
- * greater that or equal to the largest document id passed to one of
- * the add methods after the {@link Writer} was created.
- */
- public abstract void finish(int docCount) throws IOException;
-
- @Override
- protected void merge(SingleSubMergeState state) throws IOException {
- // This enables bulk copies in subclasses per MergeState, subclasses can
- // simply override this and decide if they want to merge
- // segments using this generic implementation or if a bulk merge is possible
- // / feasible.
- final Source source = state.reader.getDirectSource();
- assert source != null;
- setNextMergeSource(source); // set the current enum we are working on - the
- // impl. will get the correct reference for the type
- // it supports
- int docID = state.docBase;
- final Bits liveDocs = state.liveDocs;
- final int docCount = state.docCount;
- for (int i = 0; i < docCount; i++) {
- if (liveDocs == null || liveDocs.get(i)) {
- mergeDoc(docID++, i);
- }
- }
-
- }
-
- /**
* Factory method to create a {@link Writer} instance for a given type. This
* method returns default implementations for each of the different types
* defined in the {@link Type} enumeration.
@@ -181,7 +82,7 @@ public abstract class Writer extends Doc
* @return a new {@link Writer} instance for the given {@link Type}
* @throws IOException
*/
- public static Writer create(Type type, String id, Directory directory,
+ public static DocValuesConsumer create(Type type, String id, Directory directory,
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, boolean fasterButMoreRam) throws IOException {
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java?rev=1227676&r1=1227675&r2=1227676&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/codecs/sep/SepDocValuesConsumer.java Thu Jan 5 16:21:17 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.sep;
*/
import java.io.IOException;
+import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesWriterBase;
@@ -28,6 +29,7 @@ import org.apache.lucene.index.IndexFile
import org.apache.lucene.index.PerDocWriteState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
/**
* Implementation of PerDocConsumer that uses separate files.
@@ -35,10 +37,11 @@ import org.apache.lucene.store.Directory
*/
public class SepDocValuesConsumer extends DocValuesWriterBase {
private final Directory directory;
-
+ private final FieldInfos fieldInfos;
public SepDocValuesConsumer(PerDocWriteState state) throws IOException {
super(state);
this.directory = state.directory;
+ fieldInfos = state.fieldInfos;
}
@Override
@@ -46,13 +49,16 @@ public class SepDocValuesConsumer extend
return directory;
}
- @SuppressWarnings("fallthrough")
public static void files(Directory dir, SegmentInfo segmentInfo,
Set<String> files) throws IOException {
- FieldInfos fieldInfos = segmentInfo.getFieldInfos();
+ files(dir, segmentInfo.getFieldInfos(), segmentInfo.name, files);
+ }
+
+ @SuppressWarnings("fallthrough")
+ private static void files(Directory dir,FieldInfos fieldInfos, String segmentName, Set<String> files) {
for (FieldInfo fieldInfo : fieldInfos) {
if (fieldInfo.hasDocValues()) {
- String filename = docValuesId(segmentInfo.name, fieldInfo.number);
+ String filename = docValuesId(segmentName, fieldInfo.number);
switch (fieldInfo.getDocValuesType()) {
case BYTES_FIXED_DEREF:
case BYTES_VAR_DEREF:
@@ -61,8 +67,13 @@ public class SepDocValuesConsumer extend
case BYTES_VAR_SORTED:
files.add(IndexFileNames.segmentFileName(filename, "",
Writer.INDEX_EXTENSION));
+ try {
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.INDEX_EXTENSION));
+ } catch (IOException e) {
+ // don't throw checked exception - dir is only used in assert
+ throw new RuntimeException(e);
+ }
// until here all types use an index
case BYTES_FIXED_STRAIGHT:
case FLOAT_32:
@@ -74,8 +85,13 @@ public class SepDocValuesConsumer extend
case FIXED_INTS_8:
files.add(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
+ try {
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.DATA_EXTENSION));
+ } catch (IOException e) {
+ // don't throw checked exception - dir is only used in assert
+ throw new RuntimeException(e);
+ }
break;
default:
assert false;
@@ -83,4 +99,11 @@ public class SepDocValuesConsumer extend
}
}
}
+
+ @Override
+ public void abort() {
+ Set<String> files = new HashSet<String>();
+ files(directory, fieldInfos, segmentName, files);
+ IOUtils.deleteFilesIgnoringExceptions(directory, files.toArray(new String[0]));
+ }
}