You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2011/05/22 23:45:45 UTC
svn commit: r1126234 [10/28] - in /lucene/dev/branches/solr2452: ./
dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/
dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contri...
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java Sun May 22 21:45:19 2011
@@ -17,84 +17,59 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Map;
+
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
-import java.io.IOException;
-import java.util.Collection;
-
-import java.util.Map;
-
final class TermVectorsTermsWriter extends TermsHashConsumer {
- final DocumentsWriter docWriter;
- PerDoc[] docFreeList = new PerDoc[1];
+ final DocumentsWriterPerThread docWriter;
int freeCount;
IndexOutput tvx;
IndexOutput tvd;
IndexOutput tvf;
int lastDocID;
+
+ final DocumentsWriterPerThread.DocState docState;
+ final BytesRef flushTerm = new BytesRef();
+
+ // Used by perField when serializing the term vectors
+ final ByteSliceReader vectorSliceReader = new ByteSliceReader();
boolean hasVectors;
- public TermVectorsTermsWriter(DocumentsWriter docWriter) {
+ public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
+ docState = docWriter.docState;
}
@Override
- public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
- return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
- }
-
- @Override
- synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
+ void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
if (tvx != null) {
// At least one doc in this run had term vectors enabled
fill(state.numDocs);
+ assert state.segmentName != null;
+ String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
tvx.close();
tvf.close();
tvd.close();
tvx = tvd = tvf = null;
- assert state.segmentName != null;
- String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
- if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
+ if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
}
lastDocID = 0;
- state.hasVectors = hasVectors;
hasVectors = false;
}
- for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
- for (final TermsHashConsumerPerField field : entry.getValue() ) {
- TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
- perField.termsHashPerField.reset();
- perField.shrinkHash();
- }
-
- TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
- perThread.termsHashPerThread.reset(true);
- }
- }
-
- int allocCount;
-
- synchronized PerDoc getPerDoc() {
- if (freeCount == 0) {
- allocCount++;
- if (allocCount > docFreeList.length) {
- // Grow our free list up front to make sure we have
- // enough space to recycle all outstanding PerDoc
- // instances
- assert allocCount == 1+docFreeList.length;
- docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- }
- return new PerDoc();
- } else {
- return docFreeList[--freeCount];
+ for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
+ TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
+ perField.termsHashPerField.reset();
+ perField.shrinkHash();
}
}
@@ -112,18 +87,17 @@ final class TermVectorsTermsWriter exten
}
}
- synchronized void initTermVectorsWriter() throws IOException {
+ private final void initTermVectorsWriter() throws IOException {
if (tvx == null) {
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
- hasVectors = true;
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
-
+
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
@@ -132,39 +106,45 @@ final class TermVectorsTermsWriter exten
}
}
- synchronized void finishDocument(PerDoc perDoc) throws IOException {
+ @Override
+ void finishDocument(TermsHash termsHash) throws IOException {
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
+ if (!hasVectors) {
+ return;
+ }
+
initTermVectorsWriter();
- fill(perDoc.docID);
+ fill(docState.docID);
// Append term vectors to the real outputs:
tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
- tvd.writeVInt(perDoc.numVectorFields);
- if (perDoc.numVectorFields > 0) {
- for(int i=0;i<perDoc.numVectorFields;i++) {
- tvd.writeVInt(perDoc.fieldNumbers[i]);
- }
- assert 0 == perDoc.fieldPointers[0];
- long lastPos = perDoc.fieldPointers[0];
- for(int i=1;i<perDoc.numVectorFields;i++) {
- long pos = perDoc.fieldPointers[i];
+ tvd.writeVInt(numVectorFields);
+ if (numVectorFields > 0) {
+ for(int i=0;i<numVectorFields;i++) {
+ tvd.writeVInt(perFields[i].fieldInfo.number);
+ }
+ long lastPos = tvf.getFilePointer();
+ perFields[0].finishDocument();
+ for(int i=1;i<numVectorFields;i++) {
+ long pos = tvf.getFilePointer();
tvd.writeVLong(pos-lastPos);
lastPos = pos;
+ perFields[i].finishDocument();
+ // commit the termVectors once successful success - FI will otherwise reset them
+ perFields[i].fieldInfo.commitVectors();
}
- perDoc.perDocTvf.writeTo(tvf);
- perDoc.numVectorFields = 0;
}
- assert lastDocID == perDoc.docID;
+ assert lastDocID == docState.docID;
lastDocID++;
- perDoc.reset();
- free(perDoc);
+ termsHash.reset();
+ reset();
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
}
@@ -189,55 +169,58 @@ final class TermVectorsTermsWriter exten
}
tvx = tvd = tvf = null;
lastDocID = 0;
- }
- synchronized void free(PerDoc doc) {
- assert freeCount < docFreeList.length;
- docFreeList[freeCount++] = doc;
+ reset();
}
- class PerDoc extends DocumentsWriter.DocWriter {
+ int numVectorFields;
- final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
- RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
+ TermVectorsTermsWriterPerField[] perFields;
- int numVectorFields;
+ void reset() {
+ numVectorFields = 0;
+ perFields = new TermVectorsTermsWriterPerField[1];
+ }
- int[] fieldNumbers = new int[1];
- long[] fieldPointers = new long[1];
+ @Override
+ public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
+ return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
- void reset() {
- perDocTvf.reset();
- buffer.recycle();
- numVectorFields = 0;
+ void addFieldToFlush(TermVectorsTermsWriterPerField fieldToFlush) {
+ if (numVectorFields == perFields.length) {
+ int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ TermVectorsTermsWriterPerField[] newArray = new TermVectorsTermsWriterPerField[newSize];
+ System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
+ perFields = newArray;
}
- @Override
- void abort() {
- reset();
- free(this);
- }
+ perFields[numVectorFields++] = fieldToFlush;
+ }
- void addField(final int fieldNumber) {
- if (numVectorFields == fieldNumbers.length) {
- fieldNumbers = ArrayUtil.grow(fieldNumbers);
- }
- if (numVectorFields == fieldPointers.length) {
- fieldPointers = ArrayUtil.grow(fieldPointers);
- }
- fieldNumbers[numVectorFields] = fieldNumber;
- fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
- numVectorFields++;
- }
+ @Override
+ void startDocument() throws IOException {
+ assert clearLastVectorFieldName();
+ reset();
+ }
- @Override
- public long sizeInBytes() {
- return buffer.getSizeInBytes();
- }
+ // Called only by assert
+ final boolean clearLastVectorFieldName() {
+ lastVectorFieldName = null;
+ return true;
+ }
- @Override
- public void finish() throws IOException {
- finishDocument(this);
+ // Called only by assert
+ String lastVectorFieldName;
+ final boolean vectorFieldsInOrder(FieldInfo fi) {
+ try {
+ if (lastVectorFieldName != null)
+ return lastVectorFieldName.compareTo(fi.name) < 0;
+ else
+ return true;
+ } finally {
+ lastVectorFieldName = fi.name;
}
}
+
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java Sun May 22 21:45:19 2011
@@ -28,11 +28,10 @@ import org.apache.lucene.util.RamUsageEs
final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
- final TermVectorsTermsWriterPerThread perThread;
final TermsHashPerField termsHashPerField;
final TermVectorsTermsWriter termsWriter;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
boolean doVectors;
@@ -41,11 +40,10 @@ final class TermVectorsTermsWriterPerFie
int maxNumPostings;
OffsetAttribute offsetAttribute = null;
-
- public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) {
+
+ public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
- this.perThread = perThread;
- this.termsWriter = perThread.termsWriter;
+ this.termsWriter = termsWriter;
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
@@ -72,22 +70,12 @@ final class TermVectorsTermsWriterPerFie
}
if (doVectors) {
- if (perThread.doc == null) {
- perThread.doc = termsWriter.getPerDoc();
- perThread.doc.docID = docState.docID;
- assert perThread.doc.numVectorFields == 0;
- assert 0 == perThread.doc.perDocTvf.length();
- assert 0 == perThread.doc.perDocTvf.getFilePointer();
- }
-
- assert perThread.doc.docID == docState.docID;
-
+ termsWriter.hasVectors = true;
if (termsHashPerField.bytesHash.size() != 0) {
// Only necessary if previous doc hit a
// non-aborting exception while writing vectors in
// this field:
termsHashPerField.reset();
- perThread.termsHashPerThread.reset(false);
}
}
@@ -95,42 +83,42 @@ final class TermVectorsTermsWriterPerFie
//perThread.postingsCount = 0;
return doVectors;
- }
+ }
public void abort() {}
/** Called once per field per document if term vectors
* are enabled, to write the vectors to
* RAMOutputStream, which is then quickly flushed to
- * the real term vectors files in the Directory. */
- @Override
+ * the real term vectors files in the Directory. */ @Override
void finish() throws IOException {
+ if (!doVectors || termsHashPerField.bytesHash.size() == 0)
+ return;
+ termsWriter.addFieldToFlush(this);
+ }
+
+ void finishDocument() throws IOException {
assert docState.testPoint("TermVectorsTermsWriterPerField.finish start");
final int numPostings = termsHashPerField.bytesHash.size();
- final BytesRef flushTerm = perThread.flushTerm;
+ final BytesRef flushTerm = termsWriter.flushTerm;
assert numPostings >= 0;
- if (!doVectors || numPostings == 0)
- return;
-
if (numPostings > maxNumPostings)
maxNumPostings = numPostings;
- final IndexOutput tvf = perThread.doc.perDocTvf;
-
// This is called once, after inverting all occurrences
// of a given field in the doc. At this point we flush
// our hash into the DocWriter.
assert fieldInfo.storeTermVector;
- assert perThread.vectorFieldsInOrder(fieldInfo);
+ assert termsWriter.vectorFieldsInOrder(fieldInfo);
- perThread.doc.addField(termsHashPerField.fieldInfo.number);
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
+ final IndexOutput tvf = termsWriter.tvf;
// TODO: we may want to make this sort in same order
// as Codec's terms dict?
@@ -140,21 +128,21 @@ final class TermVectorsTermsWriterPerFie
byte bits = 0x0;
if (doVectorPositions)
bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
- if (doVectorOffsets)
+ if (doVectorOffsets)
bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
tvf.writeByte(bits);
int lastLen = 0;
byte[] lastBytes = null;
int lastStart = 0;
-
- final ByteSliceReader reader = perThread.vectorSliceReader;
- final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool;
+
+ final ByteSliceReader reader = termsWriter.vectorSliceReader;
+ final ByteBlockPool termBytePool = termsHashPerField.termBytePool;
for(int j=0;j<numPostings;j++) {
final int termID = termIDs[j];
final int freq = postings.freqs[termID];
-
+
// Get BytesRef
termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
@@ -192,20 +180,13 @@ final class TermVectorsTermsWriterPerFie
}
termsHashPerField.reset();
-
- // NOTE: we clear, per-field, at the thread level,
- // because term vectors fully write themselves on each
- // field; this saves RAM (eg if large doc has two large
- // fields w/ term vectors on) because we recycle/reuse
- // all RAM after each field:
- perThread.termsHashPerThread.reset(false);
}
void shrinkHash() {
termsHashPerField.shrinkHash(maxNumPostings);
maxNumPostings = 0;
}
-
+
@Override
void start(Fieldable f) {
if (doVectorOffsets) {
@@ -225,7 +206,7 @@ final class TermVectorsTermsWriterPerFie
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
+
termsHashPerField.writeVInt(1, startOffset);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
@@ -243,13 +224,13 @@ final class TermVectorsTermsWriterPerFie
assert docState.testPoint("TermVectorsTermsWriterPerField.addTerm start");
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
-
+
postings.freqs[termID]++;
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
+
termsHashPerField.writeVInt(1, startOffset - postings.lastOffsets[termID]);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
@@ -280,7 +261,7 @@ final class TermVectorsTermsWriterPerFie
int[] freqs; // How many times this term occurred in the current doc
int[] lastOffsets; // Last offset we saw
int[] lastPositions; // Last position where this term occurred
-
+
@Override
ParallelPostingsArray newInstance(int size) {
return new TermVectorsPostingsArray(size);
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java Sun May 22 21:45:19 2011
@@ -20,12 +20,13 @@ package org.apache.lucene.index;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
final class TermVectorsWriter {
-
+
private IndexOutput tvx = null, tvd = null, tvf = null;
private FieldInfos fieldInfos;
@@ -46,7 +47,7 @@ final class TermVectorsWriter {
/**
* Add a complete document specified by all its term vectors. If document has no
* term vectors, add value for tvx.
- *
+ *
* @param vectors
* @throws IOException
*/
@@ -99,7 +100,7 @@ final class TermVectorsWriter {
final int[] freqs = vectors[i].getTermFrequencies();
for (int j=0; j<numTerms; j++) {
-
+
int start = j == 0 ? 0 : StringHelper.bytesDifference(terms[j-1].bytes,
terms[j-1].length,
terms[j].bytes,
@@ -181,30 +182,11 @@ final class TermVectorsWriter {
assert tvd.getFilePointer() == tvdPosition;
assert tvf.getFilePointer() == tvfPosition;
}
-
+
/** Close all streams. */
final void close() throws IOException {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
- IOException keep = null;
- if (tvx != null)
- try {
- tvx.close();
- } catch (IOException e) {
- keep = e;
- }
- if (tvd != null)
- try {
- tvd.close();
- } catch (IOException e) {
- if (keep == null) keep = e;
- }
- if (tvf != null)
- try {
- tvf.close();
- } catch (IOException e) {
- if (keep == null) keep = e;
- }
- if (keep != null) throw (IOException) keep.fillInStackTrace();
+ IOUtils.closeSafely(tvx, tvd, tvf);
}
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHash.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHash.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHash.java Sun May 22 21:45:19 2011
@@ -18,12 +18,12 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+
/** This class implements {@link InvertedDocConsumer}, which
* is passed each token produced by the analyzer on each
* field. It stores these tokens in a hash table, and
@@ -36,78 +36,118 @@ final class TermsHash extends InvertedDo
final TermsHashConsumer consumer;
final TermsHash nextTermsHash;
- final DocumentsWriter docWriter;
+ final DocumentsWriterPerThread docWriter;
+
+ final IntBlockPool intPool;
+ final ByteBlockPool bytePool;
+ ByteBlockPool termBytePool;
+
+ final boolean primary;
+ final DocumentsWriterPerThread.DocState docState;
+
+ // Used when comparing postings via termRefComp, in TermsHashPerField
+ final BytesRef tr1 = new BytesRef();
+ final BytesRef tr2 = new BytesRef();
- boolean trackAllocations;
+ // Used by perField to obtain terms from the analysis chain
+ final BytesRef termBytesRef = new BytesRef(10);
- public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
+ final boolean trackAllocations;
+
+
+ public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
+ this.docState = docWriter.docState;
this.docWriter = docWriter;
this.consumer = consumer;
+ this.trackAllocations = trackAllocations;
this.nextTermsHash = nextTermsHash;
- this.trackAllocations = trackAllocations;
+ intPool = new IntBlockPool(docWriter);
+ bytePool = new ByteBlockPool(docWriter.byteBlockAllocator);
+
+ if (nextTermsHash != null) {
+ // We are primary
+ primary = true;
+ termBytePool = bytePool;
+ nextTermsHash.termBytePool = bytePool;
+ } else {
+ primary = false;
+ }
}
@Override
- InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
- return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
+ public void abort() {
+ reset();
+ try {
+ consumer.abort();
+ } finally {
+ if (nextTermsHash != null) {
+ nextTermsHash.abort();
+ }
+ }
}
- TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) {
- return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
- }
+ // Clear all state
+ void reset() {
+ intPool.reset();
+ bytePool.reset();
- @Override
- public void abort() {
- consumer.abort();
- if (nextTermsHash != null)
- nextTermsHash.abort();
+ if (primary) {
+ bytePool.reset();
+ }
}
@Override
- synchronized void flush(Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
- Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> childThreadsAndFields = new HashMap<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>>();
- Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> nextThreadsAndFields;
-
- if (nextTermsHash != null)
- nextThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
- else
- nextThreadsAndFields = null;
+ void flush(Map<FieldInfo,InvertedDocConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
+ Map<FieldInfo,TermsHashConsumerPerField> childFields = new HashMap<FieldInfo,TermsHashConsumerPerField>();
+ Map<FieldInfo,InvertedDocConsumerPerField> nextChildFields;
+
+ if (nextTermsHash != null) {
+ nextChildFields = new HashMap<FieldInfo,InvertedDocConsumerPerField>();
+ } else {
+ nextChildFields = null;
+ }
- for (final Map.Entry<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> entry : threadsAndFields.entrySet()) {
+ for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
+ TermsHashPerField perField = (TermsHashPerField) entry.getValue();
+ childFields.put(entry.getKey(), perField.consumer);
+ if (nextTermsHash != null) {
+ nextChildFields.put(entry.getKey(), perField.nextPerField);
+ }
+ }
- TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey();
+ consumer.flush(childFields, state);
- Collection<InvertedDocConsumerPerField> fields = entry.getValue();
+ if (nextTermsHash != null) {
+ nextTermsHash.flush(nextChildFields, state);
+ }
+ }
- Iterator<InvertedDocConsumerPerField> fieldsIt = fields.iterator();
- Collection<TermsHashConsumerPerField> childFields = new HashSet<TermsHashConsumerPerField>();
- Collection<InvertedDocConsumerPerField> nextChildFields;
+ @Override
+ InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
+ return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo);
+ }
- if (nextTermsHash != null)
- nextChildFields = new HashSet<InvertedDocConsumerPerField>();
- else
- nextChildFields = null;
+ @Override
+ public boolean freeRAM() {
+ return false;
+ }
- while(fieldsIt.hasNext()) {
- TermsHashPerField perField = (TermsHashPerField) fieldsIt.next();
- childFields.add(perField.consumer);
- if (nextTermsHash != null)
- nextChildFields.add(perField.nextPerField);
+ @Override
+ void finishDocument() throws IOException {
+ try {
+ consumer.finishDocument(this);
+ } finally {
+ if (nextTermsHash != null) {
+ nextTermsHash.consumer.finishDocument(nextTermsHash);
}
-
- childThreadsAndFields.put(perThread.consumer, childFields);
- if (nextTermsHash != null)
- nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields);
}
-
- consumer.flush(childThreadsAndFields, state);
-
- if (nextTermsHash != null)
- nextTermsHash.flush(nextThreadsAndFields, state);
}
@Override
- synchronized public boolean freeRAM() {
- return false;
+ void startDocument() throws IOException {
+ consumer.startDocument();
+ if (nextTermsHash != null) {
+ nextTermsHash.consumer.startDocument();
+ }
}
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java Sun May 22 21:45:19 2011
@@ -18,11 +18,12 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.Map;
abstract class TermsHashConsumer {
- abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread);
- abstract void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException;
abstract void abort();
- }
+ abstract void startDocument() throws IOException;
+ abstract void finishDocument(TermsHash termsHash) throws IOException;
+ abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
+}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java Sun May 22 21:45:19 2011
@@ -34,9 +34,10 @@ final class TermsHashPerField extends In
final TermsHashConsumerPerField consumer;
+ final TermsHash termsHash;
+
final TermsHashPerField nextPerField;
- final TermsHashPerThread perThread;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
BytesRef termBytesRef;
@@ -52,27 +53,27 @@ final class TermsHashPerField extends In
final FieldInfo fieldInfo;
final BytesRefHash bytesHash;
-
+
ParallelPostingsArray postingsArray;
private final AtomicLong bytesUsed;
- public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
- this.perThread = perThread;
- intPool = perThread.intPool;
- bytePool = perThread.bytePool;
- termBytePool = perThread.termBytePool;
- docState = perThread.docState;
- bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong();
-
+ public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) {
+ intPool = termsHash.intPool;
+ bytePool = termsHash.bytePool;
+ termBytePool = termsHash.termBytePool;
+ docState = termsHash.docState;
+ this.termsHash = termsHash;
+ bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed
+ : new AtomicLong();
fieldState = docInverterPerField.fieldState;
- this.consumer = perThread.consumer.addField(this, fieldInfo);
+ this.consumer = termsHash.consumer.addField(this, fieldInfo);
PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
- bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
+ bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
streamCount = consumer.getStreamCount();
numPostingInt = 2*streamCount;
this.fieldInfo = fieldInfo;
- if (nextPerThread != null)
- nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
+ if (nextTermsHash != null)
+ nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo);
else
nextPerField = null;
}
@@ -80,7 +81,7 @@ final class TermsHashPerField extends In
void shrinkHash(int targetSize) {
// Fully free the bytesHash on each flush but keep the pool untouched
// bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too
- bytesHash.clear(false);
+ bytesHash.clear(false);
}
public void reset() {
@@ -90,7 +91,7 @@ final class TermsHashPerField extends In
}
@Override
- synchronized public void abort() {
+ public void abort() {
reset();
if (nextPerField != null)
nextPerField.abort();
@@ -99,14 +100,13 @@ final class TermsHashPerField extends In
public void initReader(ByteSliceReader reader, int termID, int stream) {
assert stream < streamCount;
int intStart = postingsArray.intStarts[termID];
- final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
reader.init(bytePool,
postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
ints[upto+stream]);
}
-
/** Collapse the hash table & sort in-place. */
public int[] sortPostings(Comparator<BytesRef> termComp) {
return bytesHash.sort(termComp);
@@ -124,7 +124,7 @@ final class TermsHashPerField extends In
nextPerField.start(f);
}
}
-
+
@Override
boolean start(Fieldable[] fields, int count) throws IOException {
doCall = consumer.start(fields, count);
@@ -143,11 +143,12 @@ final class TermsHashPerField extends In
// First time we are seeing this token since we last
// flushed the hash.
// Init stream slices
- if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+ if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE)
intPool.nextBuffer();
- if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
+ if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
+ }
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
@@ -166,8 +167,8 @@ final class TermsHashPerField extends In
} else {
termID = (-termID)-1;
int intStart = postingsArray.intStarts[termID];
- intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
}
@@ -192,7 +193,7 @@ final class TermsHashPerField extends In
if (docState.maxTermPrefix == null) {
final int saved = termBytesRef.length;
try {
- termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8);
+ termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8);
docState.maxTermPrefix = termBytesRef.toString();
} finally {
termBytesRef.length = saved;
@@ -204,7 +205,7 @@ final class TermsHashPerField extends In
if (termID >= 0) {// New posting
bytesHash.byteStart(termID);
// Init stream slices
- if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) {
+ if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) {
intPool.nextBuffer();
}
@@ -229,8 +230,8 @@ final class TermsHashPerField extends In
} else {
termID = (-termID)-1;
final int intStart = postingsArray.intStarts[termID];
- intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
@@ -278,7 +279,7 @@ final class TermsHashPerField extends In
if (nextPerField != null)
nextPerField.finish();
}
-
+
private static final class PostingsBytesStartArray extends BytesStartArray {
private final TermsHashPerField perField;
@@ -289,10 +290,10 @@ final class TermsHashPerField extends In
this.perField = perField;
this.bytesUsed = bytesUsed;
}
-
+
@Override
public int[] init() {
- if(perField.postingsArray == null) {
+ if(perField.postingsArray == null) {
perField.postingsArray = perField.consumer.createPostingsArray(2);
bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
}
@@ -312,7 +313,7 @@ final class TermsHashPerField extends In
@Override
public int[] clear() {
if(perField.postingsArray != null) {
- bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
+ bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()));
perField.postingsArray = null;
}
return null;
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Sun May 22 21:45:19 2011
@@ -151,10 +151,10 @@ public class BlockTermsReader extends Fi
}
protected void readHeader(IndexInput input) throws IOException {
- CodecUtil.checkHeader(in, BlockTermsWriter.CODEC_NAME,
+ CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME,
BlockTermsWriter.VERSION_START,
BlockTermsWriter.VERSION_CURRENT);
- dirOffset = in.readLong();
+ dirOffset = input.readLong();
}
protected void seekDir(IndexInput input, long dirOffset)
@@ -842,6 +842,11 @@ public class BlockTermsReader extends Fi
private void decodeMetaData() throws IOException {
//System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termCount + " state=" + state);
if (!seekPending) {
+ // TODO: cutover to random-access API
+ // here.... really stupid that we have to decode N
+ // wasted term metadata just to get to the N+1th
+ // that we really need...
+
// lazily catch up on metadata decode:
final int limit = state.termCount;
// We must set/incr state.termCount because
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Sun May 22 21:45:19 2011
@@ -71,6 +71,11 @@ public class CodecProvider {
}
}
}
+
+ /** @lucene.internal */
+ public synchronized Set<String> listAll() {
+ return codecs.keySet();
+ }
public Collection<String> getAllExtensions() {
return knownExtensions;
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListReader.java Sun May 22 21:45:19 2011
@@ -184,9 +184,21 @@ public abstract class MultiLevelSkipList
}
}
+ /** returns x == 0 ? 0 : Math.floor(Math.log(x) / Math.log(base)) */
+ static int log(int x, int base) {
+ assert base >= 2;
+ int ret = 0;
+ long n = base; // needs to be a long to avoid overflow
+ while (x >= n) {
+ n *= base;
+ ret++;
+ }
+ return ret;
+ }
+
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {
- numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
+ numberOfSkipLevels = log(docCount, skipInterval[0]);
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/MultiLevelSkipListWriter.java Sun May 22 21:45:19 2011
@@ -61,7 +61,7 @@ public abstract class MultiLevelSkipList
this.skipInterval = skipInterval;
// calculate the maximum number of skip levels for this document frequency
- numberOfSkipLevels = df == 0 ? 0 : (int) Math.floor(Math.log(df) / Math.log(skipInterval));
+ numberOfSkipLevels = MultiLevelSkipListReader.log(df, skipInterval);
// make sure it does not exceed maxSkipLevels
if (numberOfSkipLevels > maxSkipLevels) {
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java Sun May 22 21:45:19 2011
@@ -18,6 +18,9 @@ package org.apache.lucene.index.codecs;
*/
import java.io.IOException;
+import java.io.FileOutputStream; // for toDot
+import java.io.OutputStreamWriter; // for toDot
+import java.io.Writer; // for toDot
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
@@ -34,6 +37,7 @@ import org.apache.lucene.util.automaton.
import org.apache.lucene.util.automaton.fst.BytesRefFSTEnum;
import org.apache.lucene.util.automaton.fst.FST;
import org.apache.lucene.util.automaton.fst.PositiveIntOutputs;
+import org.apache.lucene.util.automaton.fst.Util; // for toDot
/** See {@link VariableGapTermsIndexWriter}
*
@@ -52,11 +56,13 @@ public class VariableGapTermsIndexReader
// start of the field info data
protected long dirOffset;
+ final String segment;
+
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String codecId)
throws IOException {
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION));
-
+ this.segment = segment;
boolean success = false;
try {
@@ -176,6 +182,14 @@ public class VariableGapTermsIndexReader
fst = new FST<Long>(clone, fstOutputs);
clone.close();
+ /*
+ final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+ Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+ Util.toDot(fst, w, false, false);
+ System.out.println("FST INDEX: SAVED to " + dotFileName);
+ w.close();
+ */
+
if (indexDivisor > 1) {
// subsample
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java Sun May 22 21:45:19 2011
@@ -200,6 +200,7 @@ public class VariableGapTermsIndexWriter
private class FSTFieldWriter extends FieldWriter {
private final Builder<Long> fstBuilder;
private final PositiveIntOutputs fstOutputs;
+ private final long startTermsFilePointer;
final FieldInfo fieldInfo;
int numIndexTerms;
@@ -220,6 +221,7 @@ public class VariableGapTermsIndexWriter
// Always put empty string in
fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
+ startTermsFilePointer = termsFilePointer;
}
@Override
@@ -239,6 +241,11 @@ public class VariableGapTermsIndexWriter
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+ if (text.length == 0) {
+ // We already added empty string in ctor
+ assert termsFilePointer == startTermsFilePointer;
+ return;
+ }
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text);
try {
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/preflex/TermInfosReader.java Sun May 22 21:45:19 2011
@@ -310,7 +310,7 @@ public final class TermInfosReader {
}
} else {
assert sameTermInfo(ti, tiOrd, enumerator);
- assert (int) enumerator.position == tiOrd.termOrd;
+ assert enumerator.position == tiOrd.termOrd;
}
} else {
ti = null;
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Sun May 22 21:45:19 2011
@@ -68,15 +68,8 @@ public class PulsingPostingsReaderImpl e
@Override
public Object clone() {
- PulsingTermState clone;
- clone = (PulsingTermState) super.clone();
- if (postingsSize != -1) {
- clone.postings = new byte[postingsSize];
- System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
- } else {
- assert wrappedTermState != null;
- clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
- }
+ PulsingTermState clone = new PulsingTermState();
+ clone.copyFrom(this);
return clone;
}
@@ -90,8 +83,10 @@ public class PulsingPostingsReaderImpl e
postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
}
System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
- } else {
+ } else if (wrappedTermState != null) {
wrappedTermState.copyFrom(other.wrappedTermState);
+ } else {
+ wrappedTermState = (BlockTermState) other.wrappedTermState.clone();
}
// NOTE: we do not copy the
@@ -233,6 +228,7 @@ public class PulsingPostingsReaderImpl e
private Bits skipDocs;
private int docID;
private int freq;
+ private int payloadLength;
public PulsingDocsEnum(FieldInfo fieldInfo) {
omitTF = fieldInfo.omitTermFreqAndPositions;
@@ -246,6 +242,7 @@ public class PulsingPostingsReaderImpl e
System.arraycopy(termState.postings, 0, bytes, 0, termState.postingsSize);
postings.reset(bytes);
docID = 0;
+ payloadLength = 0;
freq = 1;
this.skipDocs = skipDocs;
return this;
@@ -277,7 +274,6 @@ public class PulsingPostingsReaderImpl e
// Skip positions
if (storePayloads) {
- int payloadLength = -1;
for(int pos=0;pos<freq;pos++) {
final int posCode = postings.readVInt();
if ((posCode & 1) != 0) {
@@ -352,6 +348,7 @@ public class PulsingPostingsReaderImpl e
postings.reset(bytes);
this.skipDocs = skipDocs;
payloadLength = 0;
+ posPending = 0;
docID = 0;
//System.out.println("PR d&p reset storesPayloads=" + storePayloads + " bytes=" + bytes.length + " this=" + this);
return this;
@@ -359,7 +356,7 @@ public class PulsingPostingsReaderImpl e
@Override
public int nextDoc() throws IOException {
- //System.out.println("PR d&p nextDoc this=" + this);
+ //System.out.println("PR.nextDoc this=" + this);
while(true) {
//System.out.println(" cycle skip posPending=" + posPending);
@@ -367,15 +364,16 @@ public class PulsingPostingsReaderImpl e
skipPositions();
if (postings.eof()) {
- //System.out.println("PR END");
+ //System.out.println(" END");
return docID = NO_MORE_DOCS;
}
-
+ //System.out.println(" read doc code");
final int code = postings.readVInt();
docID += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
+ //System.out.println(" read freq");
freq = postings.readVInt(); // else read freq
}
posPending = freq;
@@ -400,10 +398,12 @@ public class PulsingPostingsReaderImpl e
@Override
public int advance(int target) throws IOException {
+ //System.out.println("PR.advance target=" + target);
int doc;
while((doc=nextDoc()) != NO_MORE_DOCS) {
+ //System.out.println(" nextDoc got doc=" + doc);
if (doc >= target) {
- return doc;
+ return docID = doc;
}
}
return docID = NO_MORE_DOCS;
@@ -411,7 +411,7 @@ public class PulsingPostingsReaderImpl e
@Override
public int nextPosition() throws IOException {
- //System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq);
+ //System.out.println("PR.nextPosition posPending=" + posPending + " vs freq=" + freq);
assert posPending > 0;
posPending--;
@@ -421,6 +421,7 @@ public class PulsingPostingsReaderImpl e
//System.out.println("PR skip payload=" + payloadLength);
postings.skipBytes(payloadLength);
}
+ //System.out.println(" read pos code");
final int code = postings.readVInt();
//System.out.println("PR code=" + code);
if ((code & 1) != 0) {
@@ -433,16 +434,17 @@ public class PulsingPostingsReaderImpl e
position += postings.readVInt();
}
- //System.out.println("PR d&p nextPos return pos=" + position + " this=" + this);
+ //System.out.println(" return pos=" + position + " hasPayload=" + !payloadRetrieved + " posPending=" + posPending + " this=" + this);
return position;
}
private void skipPositions() throws IOException {
+ //System.out.println("PR.skipPositions: posPending=" + posPending);
while(posPending != 0) {
nextPosition();
}
if (storePayloads && !payloadRetrieved) {
- //System.out.println(" skip payload len=" + payloadLength);
+ //System.out.println(" skip last payload len=" + payloadLength);
postings.skipBytes(payloadLength);
payloadRetrieved = true;
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Sun May 22 21:45:19 2011
@@ -201,6 +201,7 @@ public final class PulsingPostingsWriter
if (!omitTF) {
int lastDocID = 0;
int pendingIDX = 0;
+ int lastPayloadLength = -1;
while(pendingIDX < pendingCount) {
final Position doc = pending[pendingIDX];
@@ -217,7 +218,6 @@ public final class PulsingPostingsWriter
}
int lastPos = 0;
- int lastPayloadLength = -1;
for(int posIDX=0;posIDX<doc.termFreq;posIDX++) {
final Position pos = pending[pendingIDX++];
assert pos.docID == doc.docID;
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Sun May 22 21:45:19 2011
@@ -85,7 +85,7 @@ public class SepPostingsReaderImpl exten
}
}
- public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) {
+ public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) throws IOException {
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));
@@ -151,14 +151,8 @@ public class SepPostingsReaderImpl exten
@Override
public Object clone() {
- SepTermState other = (SepTermState) super.clone();
- other.docIndex = (IntIndexInput.Index) docIndex.clone();
- if (freqIndex != null) {
- other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
- }
- if (posIndex != null) {
- other.posIndex = (IntIndexInput.Index) posIndex.clone();
- }
+ SepTermState other = new SepTermState();
+ other.copyFrom(this);
return other;
}
@@ -166,12 +160,28 @@ public class SepPostingsReaderImpl exten
public void copyFrom(TermState _other) {
super.copyFrom(_other);
SepTermState other = (SepTermState) _other;
- docIndex.set(other.docIndex);
- if (freqIndex != null && other.freqIndex != null) {
- freqIndex.set(other.freqIndex);
+ if (docIndex == null) {
+ docIndex = (IntIndexInput.Index) other.docIndex.clone();
+ } else {
+ docIndex.set(other.docIndex);
+ }
+ if (other.freqIndex != null) {
+ if (freqIndex == null) {
+ freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
+ } else {
+ freqIndex.set(other.freqIndex);
+ }
+ } else {
+ freqIndex = null;
}
- if (posIndex != null && other.posIndex != null) {
- posIndex.set(other.posIndex);
+ if (other.posIndex != null) {
+ if (posIndex == null) {
+ posIndex = (IntIndexInput.Index) other.posIndex.clone();
+ } else {
+ posIndex.set(other.posIndex);
+ }
+ } else {
+ posIndex = null;
}
payloadFP = other.payloadFP;
skipFP = other.skipFP;
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Sun May 22 21:45:19 2011
@@ -69,12 +69,13 @@ public final class SepPostingsWriterImpl
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
- final int skipInterval = 16;
+ final int skipInterval;
+ static final int DEFAULT_SKIP_INTERVAL = 16;
/**
* Expert: minimum docFreq to write any skip data at all
*/
- final int skipMinimum = skipInterval;
+ final int skipMinimum;
/** Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
@@ -102,8 +103,13 @@ public final class SepPostingsWriterImpl
private final RAMOutputStream indexBytesWriter = new RAMOutputStream();
public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory) throws IOException {
- super();
+ this(state, factory, DEFAULT_SKIP_INTERVAL);
+ }
+ public SepPostingsWriterImpl(SegmentWriteState state, IntStreamFactory factory, int skipInterval) throws IOException {
+ super();
+ this.skipInterval = skipInterval;
+ this.skipMinimum = skipInterval; /* set to the same for now */
final String docFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, DOC_EXTENSION);
docOut = factory.createOutput(state.directory, docFileName);
docIndex = docOut.index();
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Sun May 22 21:45:19 2011
@@ -177,6 +177,7 @@ class SepSkipListReader extends MultiLev
@Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
int delta;
+ assert !omitTF || !currentFieldStoresPayloads;
if (currentFieldStoresPayloads) {
// the current field stores payloads.
// if the doc delta is odd then we have
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java Sun May 22 21:45:19 2011
@@ -50,12 +50,13 @@ public final class StandardPostingsWrite
* smaller indexes, greater acceleration, but fewer accelerable cases, while
* smaller values result in bigger indexes, less acceleration and more
* accelerable cases. More detailed experiments would be useful here. */
- final int skipInterval = 16;
+ static final int DEFAULT_SKIP_INTERVAL = 16;
+ final int skipInterval;
/**
* Expert: minimum docFreq to write any skip data at all
*/
- final int skipMinimum = skipInterval;
+ final int skipMinimum;
/** Expert: The maximum number of skip levels. Smaller values result in
* slightly smaller indexes, but slower skipping in big posting lists.
@@ -82,7 +83,12 @@ public final class StandardPostingsWrite
private RAMOutputStream bytesWriter = new RAMOutputStream();
public StandardPostingsWriter(SegmentWriteState state) throws IOException {
+ this(state, DEFAULT_SKIP_INTERVAL);
+ }
+ public StandardPostingsWriter(SegmentWriteState state, int skipInterval) throws IOException {
super();
+ this.skipInterval = skipInterval;
+ this.skipMinimum = skipInterval; /* set to the same for now */
//this.segment = state.segmentName;
String fileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, StandardCodec.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName);
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java Sun May 22 21:45:19 2011
@@ -806,6 +806,7 @@ public abstract class QueryParserBase {
}
try {
+ source.end();
source.close();
} catch (IOException ignored) {}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/BooleanQuery.java Sun May 22 21:45:19 2011
@@ -367,8 +367,12 @@ public class BooleanQuery extends Query
Query query = c.getQuery().rewrite(reader); // rewrite first
if (getBoost() != 1.0f) { // incorporate boost
- if (query == c.getQuery()) // if rewrite was no-op
+ if (query == c.getQuery()) { // if rewrite was no-op
query = (Query)query.clone(); // then clone before boost
+ }
+ // Since the BooleanQuery only has 1 clause, the BooleanQuery will be
+ // written out. Therefore the rewritten Query's boost must incorporate both
+ // the clause's boost, and the boost of the BooleanQuery itself
query.setBoost(getBoost() * query.getBoost());
}
@@ -381,8 +385,12 @@ public class BooleanQuery extends Query
BooleanClause c = clauses.get(i);
Query query = c.getQuery().rewrite(reader);
if (query != c.getQuery()) { // clause rewrote: must clone
- if (clone == null)
+ if (clone == null) {
+ // The BooleanQuery clone is lazily initialized so only initialize
+ // it if a rewritten clause differs from the original clause (and hasn't been
+ // initialized already). If nothing differs, the clone isn't needlessly created
clone = (BooleanQuery)this.clone();
+ }
clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
}
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/DocIdSetIterator.java Sun May 22 21:45:19 2011
@@ -61,9 +61,10 @@ public abstract class DocIdSetIterator {
public abstract int nextDoc() throws IOException;
/**
- * Advances to the first beyond the current whose document number is greater
- * than or equal to <i>target</i>. Returns the current document number or
- * {@link #NO_MORE_DOCS} if there are no more docs in the set.
+ * Advances to the first beyond (see NOTE below) the current whose document
+ * number is greater than or equal to <i>target</i>. Returns the current
+ * document number or {@link #NO_MORE_DOCS} if there are no more docs in the
+ * set.
* <p>
* Behaves as if written:
*
@@ -78,7 +79,7 @@ public abstract class DocIdSetIterator {
*
* Some implementations are considerably more efficient than that.
* <p>
- * <b>NOTE:</b> when <code> target ≤ current</code> implementations may opt
+ * <b>NOTE:</b> when <code> target ≤ current</code> implementations may opt
* not to advance beyond their current {@link #docID()}.
* <p>
* <b>NOTE:</b> this method may be called with {@link #NO_MORE_DOCS} for
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java Sun May 22 21:45:19 2011
@@ -127,7 +127,7 @@ public class FieldCacheTermsFilter exten
public FieldCacheTermsFilterDocIdSet(FieldCache.DocTermsIndex fcsi) {
this.fcsi = fcsi;
- openBitSet = new OpenBitSet(this.fcsi.size());
+ openBitSet = new OpenBitSet(this.fcsi.numOrd());
final BytesRef spare = new BytesRef();
for (int i=0;i<terms.length;i++) {
int termNumber = this.fcsi.binarySearchLookup(terms[i], spare);
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/HitQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/HitQueue.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/HitQueue.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/HitQueue.java Sun May 22 21:45:19 2011
@@ -21,8 +21,6 @@ import org.apache.lucene.util.PriorityQu
final class HitQueue extends PriorityQueue<ScoreDoc> {
- private boolean prePopulate;
-
/**
* Creates a new instance with <code>size</code> elements. If
* <code>prePopulate</code> is set to true, the queue will pre-populate itself
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/IndexSearcher.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/IndexSearcher.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/IndexSearcher.java Sun May 22 21:45:19 2011
@@ -46,8 +46,18 @@ import org.apache.lucene.util.ThreadInte
*
* <p>Applications usually need only call the inherited
* {@link #search(Query,int)}
- * or {@link #search(Query,Filter,int)} methods. For performance reasons it is
- * recommended to open only one IndexSearcher and use it for all of your searches.
+ * or {@link #search(Query,Filter,int)} methods. For
+ * performance reasons, if your index is unchanging, you
+ * should share a single IndexSearcher instance across
+ * multiple searches instead of creating a new one
+ * per-search. If your index has changed and you wish to
+ * see the changes reflected in searching, you should
+ * use {@link IndexReader#reopen} to obtain a new reader and
+ * then create a new IndexSearcher from that. Also, for
+ * low-latency turnaround it's best to use a near-real-time
+ * reader ({@link IndexReader#open(IndexWriter,boolean)}).
+ * Once you have a new {@link IndexReader}, it's relatively
+ * cheap to create a new IndexSearcher from it.
*
* <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link
* IndexSearcher}</code> instances are completely
@@ -857,4 +867,9 @@ public class IndexSearcher {
this.leaves = leaves;
}
}
+
+ @Override
+ public String toString() {
+ return "IndexSearcher(" + reader + ")";
+ }
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Sun May 22 21:45:19 2011
@@ -214,12 +214,12 @@ public class MultiPhraseQuery extends Qu
docFreq = reader.docFreq(term.field(), term.bytes());
}
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
}
// sort by increasing docFreq order
if (slop == 0) {
- ArrayUtil.quickSort(postingsFreqs);
+ ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) {
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhrasePositions.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhrasePositions.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhrasePositions.java Sun May 22 21:45:19 2011
@@ -28,13 +28,15 @@ final class PhrasePositions {
int position; // position in doc
int count; // remaining pos in this doc
int offset; // position in phrase
+ final int ord; // unique across all PhrasePositions instances
final DocsAndPositionsEnum postings; // stream of docs & positions
PhrasePositions next; // used to make lists
boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1)
- PhrasePositions(DocsAndPositionsEnum postings, int o) {
+ PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
this.postings = postings;
offset = o;
+ this.ord = ord;
}
final boolean next() throws IOException { // increments to next doc
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Sun May 22 21:45:19 2011
@@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
final DocsAndPositionsEnum postings;
final int docFreq;
final int position;
+ final Term term;
- public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
+ public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
+ this.term = term;
}
public int compareTo(PostingsAndFreq other) {
+ if (docFreq == other.docFreq) {
+ if (position == other.position) {
+ return term.compareTo(other.term);
+ }
+ return position - other.position;
+ }
return docFreq - other.docFreq;
}
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + docFreq;
+ result = prime * result + position;
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ PostingsAndFreq other = (PostingsAndFreq) obj;
+ if (docFreq != other.docFreq) return false;
+ if (position != other.position) return false;
+ if (term == null) {
+ if (other.term != null) return false;
+ } else if (!term.equals(other.term)) return false;
+ return true;
+ }
}
private class PhraseWeight extends Weight {
@@ -197,12 +229,12 @@ public class PhraseQuery extends Query {
return null;
}
}
- postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
+ postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
if (slop == 0) {
- ArrayUtil.quickSort(postingsFreqs);
+ ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) { // optimize exact case
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQueue.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQueue.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseQueue.java Sun May 22 21:45:19 2011
@@ -30,10 +30,16 @@ final class PhraseQueue extends Priority
if (pp1.position == pp2.position)
// same doc and pp.position, so decide by actual term positions.
// rely on: pp.position == tp.position - offset.
- return pp1.offset < pp2.offset;
- else
+ if (pp1.offset == pp2.offset) {
+ return pp1.ord < pp2.ord;
+ } else {
+ return pp1.offset < pp2.offset;
+ }
+ else {
return pp1.position < pp2.position;
- else
+ }
+ else {
return pp1.doc < pp2.doc;
+ }
}
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseScorer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseScorer.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/PhraseScorer.java Sun May 22 21:45:19 2011
@@ -55,7 +55,7 @@ abstract class PhraseScorer extends Scor
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
for (int i = 0; i < postings.length; i++) {
- PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position);
+ PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
if (last != null) { // add next to end of list
last.next = pp;
} else {
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/QueryTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/QueryTermVector.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/QueryTermVector.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/QueryTermVector.java Sun May 22 21:45:19 2011
@@ -55,7 +55,12 @@ public class QueryTermVector implements
public QueryTermVector(String queryString, Analyzer analyzer) {
if (analyzer != null)
{
- TokenStream stream = analyzer.tokenStream("", new StringReader(queryString));
+ TokenStream stream;
+ try {
+ stream = analyzer.reusableTokenStream("", new StringReader(queryString));
+ } catch (IOException e1) {
+ stream = null;
+ }
if (stream != null)
{
List<BytesRef> terms = new ArrayList<BytesRef>();
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java Sun May 22 21:45:19 2011
@@ -18,7 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.HashMap;
+import java.util.HashSet;
final class SloppyPhraseScorer extends PhraseScorer {
private int slop;
@@ -109,8 +109,14 @@ final class SloppyPhraseScorer extends P
/**
* Init PhrasePositions in place.
- * There is a one time initialization for this scorer:
+ * There is a one time initialization for this scorer (taking place at the first doc that matches all terms):
* <br>- Put in repeats[] each pp that has another pp with same position in the doc.
+ * This relies on that the position in PP is computed as (TP.position - offset) and
+ * so by adding offset we actually compare positions and identify that the two are
+ * the same term.
+ * An exclusion to this is two distinct terms in the same offset in query and same
+ * position in doc. This case is detected by comparing just the (query) offsets,
+ * and two such PPs are not considered "repeating".
* <br>- Also mark each such pp by pp.repeats = true.
* <br>Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient.
* In particular, this allows to score queries with no repetitions with no overhead due to this computation.
@@ -145,23 +151,26 @@ final class SloppyPhraseScorer extends P
if (!checkedRepeats) {
checkedRepeats = true;
// check for repeats
- HashMap<PhrasePositions, Object> m = null;
+ HashSet<PhrasePositions> m = null;
for (PhrasePositions pp = first; pp != null; pp = pp.next) {
int tpPos = pp.position + pp.offset;
for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) {
+ if (pp.offset == pp2.offset) {
+ continue; // not a repetition: the two PPs are originally in same offset in the query!
+ }
int tpPos2 = pp2.position + pp2.offset;
if (tpPos2 == tpPos) {
if (m == null)
- m = new HashMap<PhrasePositions, Object>();
+ m = new HashSet<PhrasePositions>();
pp.repeats = true;
pp2.repeats = true;
- m.put(pp,null);
- m.put(pp2,null);
+ m.add(pp);
+ m.add(pp2);
}
}
}
if (m!=null)
- repeats = m.keySet().toArray(new PhrasePositions[0]);
+ repeats = m.toArray(new PhrasePositions[0]);
}
// with repeats must advance some repeating pp's so they all start with differing tp's
@@ -204,11 +213,16 @@ final class SloppyPhraseScorer extends P
int tpPos = pp.position + pp.offset;
for (int i = 0; i < repeats.length; i++) {
PhrasePositions pp2 = repeats[i];
- if (pp2 == pp)
+ if (pp2 == pp) {
continue;
+ }
+ if (pp.offset == pp2.offset) {
+ continue; // not a repetition: the two PPs are originally in same offset in the query!
+ }
int tpPos2 = pp2.position + pp2.offset;
- if (tpPos2 == tpPos)
+ if (tpPos2 == tpPos) {
return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset.
+ }
}
return null;
}
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java Sun May 22 21:45:19 2011
@@ -134,7 +134,7 @@ public abstract class TopTermsRewrite<Q
final Term placeholderTerm = new Term(query.field);
final Q q = getTopLevelQuery();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
- ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
+ ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
for (final ScoreTerm st : scoreTerms) {
final Term term = placeholderTerm.createTerm(st.bytes);
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java Sun May 22 21:45:19 2011
@@ -190,7 +190,7 @@ public class NearSpansOrdered extends Sp
/** Advance the subSpans to the same document */
private boolean toSameDoc() throws IOException {
- ArrayUtil.quickSort(subSpansByDoc, spanDocComparator);
+ ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
int firstIndex = 0;
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/Directory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/Directory.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/Directory.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/Directory.java Sun May 22 21:45:19 2011
@@ -65,10 +65,6 @@ public abstract class Directory implemen
public abstract long fileModified(String name)
throws IOException;
- /** Set the modified time of an existing file to now. */
- public abstract void touchFile(String name)
- throws IOException;
-
/** Removes an existing file in the directory. */
public abstract void deleteFile(String name)
throws IOException;
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FSDirectory.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FSDirectory.java Sun May 22 21:45:19 2011
@@ -272,14 +272,6 @@ public abstract class FSDirectory extend
return file.lastModified();
}
- /** Set the modified time of an existing file to now. */
- @Override
- public void touchFile(String name) {
- ensureOpen();
- File file = new File(directory, name);
- file.setLastModified(System.currentTimeMillis());
- }
-
/** Returns the length in bytes of a file in the directory. */
@Override
public long fileLength(String name) throws IOException {
Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java Sun May 22 21:45:19 2011
@@ -115,11 +115,6 @@ public class FileSwitchDirectory extends
}
@Override
- public void touchFile(String name) throws IOException {
- getDirectory(name).touchFile(name);
- }
-
- @Override
public void deleteFile(String name) throws IOException {
getDirectory(name).deleteFile(name);
}