You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/02/21 15:13:40 UTC
svn commit: r1072973 [5/11] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/ant/
dev-tools/idea/lucene/contrib/demo/
dev-tools/idea/lucene/contrib/highlighter/ dev-tools/idea/lucene/contrib...
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Mon Feb 21 14:13:28 2011
@@ -121,9 +121,13 @@ class BufferedDeletesStream {
// Current gen, for the merged segment:
public final long gen;
- ApplyDeletesResult(boolean anyDeletes, long gen) {
+ // If non-null, contains segments that are 100% deleted
+ public final SegmentInfos allDeleted;
+
+ ApplyDeletesResult(boolean anyDeletes, long gen, SegmentInfos allDeleted) {
this.anyDeletes = anyDeletes;
this.gen = gen;
+ this.allDeleted = allDeleted;
}
}
@@ -154,14 +158,14 @@ class BufferedDeletesStream {
final long t0 = System.currentTimeMillis();
if (infos.size() == 0) {
- return new ApplyDeletesResult(false, nextGen++);
+ return new ApplyDeletesResult(false, nextGen++, null);
}
assert checkDeleteStats();
if (!any()) {
message("applyDeletes: no deletes; skipping");
- return new ApplyDeletesResult(false, nextGen++);
+ return new ApplyDeletesResult(false, nextGen++, null);
}
if (infoStream != null) {
@@ -178,6 +182,8 @@ class BufferedDeletesStream {
int infosIDX = infos2.size()-1;
int delIDX = deletes.size()-1;
+ SegmentInfos allDeleted = null;
+
while (infosIDX >= 0) {
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
@@ -199,6 +205,7 @@ class BufferedDeletesStream {
assert readerPool.infoIsLive(info);
SegmentReader reader = readerPool.get(info, false);
int delCount = 0;
+ final boolean segAllDeletes;
try {
if (coalescedDeletes != null) {
//System.out.println(" del coalesced");
@@ -209,13 +216,21 @@ class BufferedDeletesStream {
// Don't delete by Term here; DocumentsWriter
// already did that on flush:
delCount += applyQueryDeletes(packet.queriesIterable(), reader);
+ segAllDeletes = reader.numDocs() == 0;
} finally {
readerPool.release(reader);
}
anyNewDeletes |= delCount > 0;
+ if (segAllDeletes) {
+ if (allDeleted == null) {
+ allDeleted = new SegmentInfos();
+ }
+ allDeleted.add(info);
+ }
+
if (infoStream != null) {
- message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount);
+ message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
}
if (coalescedDeletes == null) {
@@ -234,16 +249,25 @@ class BufferedDeletesStream {
assert readerPool.infoIsLive(info);
SegmentReader reader = readerPool.get(info, false);
int delCount = 0;
+ final boolean segAllDeletes;
try {
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader);
+ segAllDeletes = reader.numDocs() == 0;
} finally {
readerPool.release(reader);
}
anyNewDeletes |= delCount > 0;
+ if (segAllDeletes) {
+ if (allDeleted == null) {
+ allDeleted = new SegmentInfos();
+ }
+ allDeleted.add(info);
+ }
+
if (infoStream != null) {
- message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount);
+ message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
}
}
info.setBufferedDeletesGen(nextGen);
@@ -258,7 +282,7 @@ class BufferedDeletesStream {
}
// assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;
- return new ApplyDeletesResult(anyNewDeletes, nextGen++);
+ return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted);
}
public synchronized long getNextGen() {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Mon Feb 21 14:13:28 2011
@@ -146,7 +146,6 @@ class DirectoryReader extends IndexReade
this.readOnly = true;
this.applyAllDeletes = applyAllDeletes; // saved for reopen
- segmentInfos = (SegmentInfos) infos.clone();// make sure we clone otherwise we share mutable state with IW
this.termInfosIndexDivisor = termInfosIndexDivisor;
if (codecs == null) {
this.codecs = CodecProvider.getDefault();
@@ -159,23 +158,33 @@ class DirectoryReader extends IndexReade
// us, which ensures infos will not change; so there's
// no need to process segments in reverse order
final int numSegments = infos.size();
- SegmentReader[] readers = new SegmentReader[numSegments];
+
+ List<SegmentReader> readers = new ArrayList<SegmentReader>();
final Directory dir = writer.getDirectory();
+ segmentInfos = (SegmentInfos) infos.clone();
+ int infosUpto = 0;
for (int i=0;i<numSegments;i++) {
boolean success = false;
try {
final SegmentInfo info = infos.info(i);
assert info.dir == dir;
- readers[i] = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
- readers[i].readerFinishedListeners = readerFinishedListeners;
+ final SegmentReader reader = writer.readerPool.getReadOnlyClone(info, true, termInfosIndexDivisor);
+ if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
+ reader.readerFinishedListeners = readerFinishedListeners;
+ readers.add(reader);
+ infosUpto++;
+ } else {
+ reader.close();
+ segmentInfos.remove(infosUpto);
+ }
success = true;
} finally {
if (!success) {
// Close all readers we had opened:
- for(i--;i>=0;i--) {
+ for(SegmentReader reader : readers) {
try {
- readers[i].close();
+ reader.close();
} catch (Throwable ignore) {
// keep going - we want to clean up as much as possible
}
@@ -186,7 +195,7 @@ class DirectoryReader extends IndexReade
this.writer = writer;
- initialize(readers);
+ initialize(readers.toArray(new SegmentReader[readers.size()]));
}
/** This constructor is only used for {@link #reopen()} */
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Mon Feb 21 14:13:28 2011
@@ -646,8 +646,16 @@ final class DocumentsWriter {
newSegment.setDelCount(delCount);
newSegment.advanceDelGen();
final String delFileName = newSegment.getDelFileName();
+ if (infoStream != null) {
+ message("flush: write " + delCount + " deletes to " + delFileName);
+ }
boolean success2 = false;
try {
+ // TODO: in the NRT case it'd be better to hand
+ // this del vector over to the
+ // shortly-to-be-opened SegmentReader and let it
+ // carry the changes; there's no reason to use
+ // filesystem as intermediary here.
flushState.deletedDocs.write(directory, delFileName);
success2 = true;
} finally {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java Mon Feb 21 14:13:28 2011
@@ -1008,8 +1008,8 @@ public abstract class IndexReader implem
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
- * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
- * int) length normalization}. Thus, to preserve the length normalization
+ * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its
+ * length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*
* <b>NOTE:</b> If this field does not store norms, then
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Mon Feb 21 14:13:28 2011
@@ -388,8 +388,7 @@ public class IndexWriter implements Clos
private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
- /** Forcefully clear changes for the specified segments,
- * and remove from the pool. This is called on successful merge. */
+ /** Forcefully clear changes for the specified segments. This is called on successful merge. */
synchronized void clear(SegmentInfos infos) throws IOException {
if (infos == null) {
for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
@@ -397,8 +396,9 @@ public class IndexWriter implements Clos
}
} else {
for (final SegmentInfo info: infos) {
- if (readerMap.containsKey(info)) {
- readerMap.get(info).hasChanges = false;
+ final SegmentReader r = readerMap.get(info);
+ if (r != null) {
+ r.hasChanges = false;
}
}
}
@@ -407,8 +407,8 @@ public class IndexWriter implements Clos
// used only by asserts
public synchronized boolean infoIsLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
- assert idx != -1;
- assert segmentInfos.get(idx) == info;
+ assert idx != -1: "info=" + info + " isn't in pool";
+ assert segmentInfos.get(idx) == info: "info=" + info + " doesn't match live info in segmentInfos";
return true;
}
@@ -478,6 +478,21 @@ public class IndexWriter implements Clos
return false;
}
+
+ public synchronized void drop(SegmentInfos infos) throws IOException {
+ for(SegmentInfo info : infos) {
+ drop(info);
+ }
+ }
+
+ public synchronized void drop(SegmentInfo info) throws IOException {
+ final SegmentReader sr = readerMap.get(info);
+ if (sr != null) {
+ sr.hasChanges = false;
+ readerMap.remove(info);
+ sr.close();
+ }
+ }
/** Remove all our references to readers, and commits
* any pending changes. */
@@ -516,19 +531,18 @@ public class IndexWriter implements Clos
* Commit all segment reader in the pool.
* @throws IOException
*/
- synchronized void commit() throws IOException {
+ synchronized void commit(SegmentInfos infos) throws IOException {
// We invoke deleter.checkpoint below, so we must be
// sync'd on IW:
assert Thread.holdsLock(IndexWriter.this);
- for (Map.Entry<SegmentInfo,SegmentReader> ent : readerMap.entrySet()) {
+ for (SegmentInfo info : infos) {
- SegmentReader sr = ent.getValue();
- if (sr.hasChanges) {
- assert infoIsLive(sr.getSegmentInfo());
+ final SegmentReader sr = readerMap.get(info);
+ if (sr != null && sr.hasChanges) {
+ assert infoIsLive(info);
sr.doCommit(null);
-
// Must checkpoint w/ deleter, because this
// segment reader will have created new _X_N.del
// file.
@@ -2558,6 +2572,24 @@ public class IndexWriter implements Clos
if (result.anyDeletes) {
checkpoint();
}
+ if (!keepFullyDeletedSegments && result.allDeleted != null) {
+ if (infoStream != null) {
+ message("drop 100% deleted segments: " + result.allDeleted);
+ }
+ for(SegmentInfo info : result.allDeleted) {
+ // If a merge has already registered for this
+ // segment, we leave it in the readerPool; the
+ // merge will skip merging it and will then drop
+ // it once it's done:
+ if (!mergingSegments.contains(info)) {
+ segmentInfos.remove(info);
+ if (readerPool != null) {
+ readerPool.drop(info);
+ }
+ }
+ }
+ checkpoint();
+ }
bufferedDeletesStream.prune(segmentInfos);
assert !bufferedDeletesStream.any();
flushControl.clearDeletes();
@@ -2634,9 +2666,13 @@ public class IndexWriter implements Clos
SegmentInfo info = sourceSegments.info(i);
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
- SegmentReader previousReader = merge.readersClone[i];
+ final SegmentReader previousReader = merge.readerClones.get(i);
+ if (previousReader == null) {
+ // Reader was skipped because it was 100% deletions
+ continue;
+ }
final Bits prevDelDocs = previousReader.getDeletedDocs();
- SegmentReader currentReader = merge.readers[i];
+ final SegmentReader currentReader = merge.readers.get(i);
final Bits currentDelDocs = currentReader.getDeletedDocs();
if (previousReader.hasDeletions()) {
@@ -2719,18 +2755,21 @@ public class IndexWriter implements Clos
return false;
}
- ensureValidMerge(merge);
-
commitMergedDeletes(merge, mergedReader);
// If the doc store we are using has been closed and
// is in now compound format (but wasn't when we
// started), then we will switch to the compound
// format as well:
- setMergeDocStoreIsCompoundFile(merge);
assert !segmentInfos.contains(merge.info);
+ final boolean allDeleted = mergedReader.numDocs() == 0;
+
+ if (infoStream != null && allDeleted) {
+ message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
+ }
+
final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
int segIdx = 0;
int newSegIdx = 0;
@@ -2739,7 +2778,7 @@ public class IndexWriter implements Clos
while(segIdx < curSegCount) {
final SegmentInfo info = segmentInfos.info(segIdx++);
if (mergedAway.contains(info)) {
- if (!inserted) {
+ if (!inserted && (!allDeleted || keepFullyDeletedSegments)) {
segmentInfos.set(segIdx-1, merge.info);
inserted = true;
newSegIdx++;
@@ -2748,7 +2787,20 @@ public class IndexWriter implements Clos
segmentInfos.set(newSegIdx++, info);
}
}
- assert newSegIdx == curSegCount - merge.segments.size() + 1;
+
+ // Either we found place to insert segment, or, we did
+ // not, but only because all segments we merged became
+ // deleted while we are merging, in which case it should
+ // be the case that the new segment is also all deleted:
+ if (!inserted) {
+ assert allDeleted;
+ if (keepFullyDeletedSegments) {
+ segmentInfos.add(0, merge.info);
+ } else {
+ readerPool.drop(merge.info);
+ }
+ }
+
segmentInfos.subList(newSegIdx, segmentInfos.size()).clear();
if (infoStream != null) {
@@ -2770,7 +2822,6 @@ public class IndexWriter implements Clos
// cascade the optimize:
segmentsToOptimize.add(merge.info);
}
-
return true;
}
@@ -2913,8 +2964,9 @@ public class IndexWriter implements Clos
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
- for(int i=0;i<count;i++)
+ for(int i=0;i<count;i++) {
mergingSegments.add(merge.segments.info(i));
+ }
// Merge is now registered
merge.registerDone = true;
@@ -2966,10 +3018,28 @@ public class IndexWriter implements Clos
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
+
if (result.anyDeletes) {
checkpoint();
}
+ if (!keepFullyDeletedSegments && result.allDeleted != null) {
+ if (infoStream != null) {
+ message("drop 100% deleted segments: " + result.allDeleted);
+ }
+ for(SegmentInfo info : result.allDeleted) {
+ segmentInfos.remove(info);
+ if (merge.segments.contains(info)) {
+ mergingSegments.remove(info);
+ merge.segments.remove(info);
+ }
+ }
+ if (readerPool != null) {
+ readerPool.drop(result.allDeleted);
+ }
+ checkpoint();
+ }
+
merge.info.setBufferedDeletesGen(result.gen);
// Lock order: IW -> BD
@@ -3023,8 +3093,9 @@ public class IndexWriter implements Clos
if (merge.registerDone) {
final SegmentInfos sourceSegments = merge.segments;
final int end = sourceSegments.size();
- for(int i=0;i<end;i++)
+ for(int i=0;i<end;i++) {
mergingSegments.remove(sourceSegments.info(i));
+ }
mergingSegments.remove(merge.info);
merge.registerDone = false;
}
@@ -3032,47 +3103,30 @@ public class IndexWriter implements Clos
runningMerges.remove(merge);
}
- private synchronized void setMergeDocStoreIsCompoundFile(MergePolicy.OneMerge merge) {
- final String mergeDocStoreSegment = merge.info.getDocStoreSegment();
- if (mergeDocStoreSegment != null && !merge.info.getDocStoreIsCompoundFile()) {
- final int size = segmentInfos.size();
- for(int i=0;i<size;i++) {
- final SegmentInfo info = segmentInfos.info(i);
- final String docStoreSegment = info.getDocStoreSegment();
- if (docStoreSegment != null &&
- docStoreSegment.equals(mergeDocStoreSegment) &&
- info.getDocStoreIsCompoundFile()) {
- merge.info.setDocStoreIsCompoundFile(true);
- break;
- }
- }
- }
- }
-
private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
- final int numSegments = merge.segments.size();
+ final int numSegments = merge.readers.size();
if (suppressExceptions) {
// Suppress any new exceptions so we throw the
// original cause
boolean anyChanges = false;
for (int i=0;i<numSegments;i++) {
- if (merge.readers[i] != null) {
+ if (merge.readers.get(i) != null) {
try {
- anyChanges |= readerPool.release(merge.readers[i], false);
+ anyChanges |= readerPool.release(merge.readers.get(i), false);
} catch (Throwable t) {
}
- merge.readers[i] = null;
+ merge.readers.set(i, null);
}
- if (merge.readersClone[i] != null) {
+ if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
try {
- merge.readersClone[i].close();
+ merge.readerClones.get(i).close();
} catch (Throwable t) {
}
// This was a private clone and we had the
// only reference
- assert merge.readersClone[i].getRefCount() == 0: "refCount should be 0 but is " + merge.readersClone[i].getRefCount();
- merge.readersClone[i] = null;
+ assert merge.readerClones.get(i).getRefCount() == 0: "refCount should be 0 but is " + merge.readerClones.get(i).getRefCount();
+ merge.readerClones.set(i, null);
}
}
if (anyChanges) {
@@ -3080,16 +3134,16 @@ public class IndexWriter implements Clos
}
} else {
for (int i=0;i<numSegments;i++) {
- if (merge.readers[i] != null) {
- readerPool.release(merge.readers[i], true);
- merge.readers[i] = null;
+ if (merge.readers.get(i) != null) {
+ readerPool.release(merge.readers.get(i), true);
+ merge.readers.set(i, null);
}
- if (merge.readersClone[i] != null) {
- merge.readersClone[i].close();
+ if (i < merge.readerClones.size() && merge.readerClones.get(i) != null) {
+ merge.readerClones.get(i).close();
// This was a private clone and we had the only reference
- assert merge.readersClone[i].getRefCount() == 0;
- merge.readersClone[i] = null;
+ assert merge.readerClones.get(i).getRefCount() == 0;
+ merge.readerClones.set(i, null);
}
}
}
@@ -3108,7 +3162,6 @@ public class IndexWriter implements Clos
int mergedDocCount = 0;
SegmentInfos sourceSegments = merge.segments;
- final int numSegments = sourceSegments.size();
SegmentMerger merger = new SegmentMerger(directory, termIndexInterval, mergedName, merge,
codecs, payloadProcessorProvider,
@@ -3118,36 +3171,43 @@ public class IndexWriter implements Clos
message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
}
+ merge.readers = new ArrayList<SegmentReader>();
+ merge.readerClones = new ArrayList<SegmentReader>();
+
merge.info.setHasVectors(merger.fieldInfos().hasVectors());
- merge.readers = new SegmentReader[numSegments];
- merge.readersClone = new SegmentReader[numSegments];
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
try {
int totDocCount = 0;
+ int segUpto = 0;
+ while(segUpto < sourceSegments.size()) {
- for (int i = 0; i < numSegments; i++) {
- final SegmentInfo info = sourceSegments.info(i);
+ final SegmentInfo info = sourceSegments.info(segUpto);
// Hold onto the "live" reader; we will use this to
// commit merged deletes
- SegmentReader reader = merge.readers[i] = readerPool.get(info, true,
- MERGE_READ_BUFFER_SIZE,
- -config.getReaderTermsIndexDivisor());
+ final SegmentReader reader = readerPool.get(info, true,
+ MERGE_READ_BUFFER_SIZE,
+ -config.getReaderTermsIndexDivisor());
+ merge.readers.add(reader);
// We clone the segment readers because other
// deletes may come in while we're merging so we
// need readers that will not change
- SegmentReader clone = merge.readersClone[i] = (SegmentReader) reader.clone(true);
- merger.add(clone);
+ final SegmentReader clone = (SegmentReader) reader.clone(true);
+ merge.readerClones.add(clone);
+ if (reader.numDocs() > 0) {
+ merger.add(clone);
+ }
totDocCount += clone.numDocs();
+ segUpto++;
}
if (infoStream != null) {
- message("merge: total "+totDocCount+" docs");
+ message("merge: total " + totDocCount + " docs");
}
merge.checkAborted(directory);
@@ -3160,11 +3220,11 @@ public class IndexWriter implements Clos
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
- message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + numSegments);
+ message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
}
- anyNonBulkMerges |= merger.getMatchedSubReaderCount() != numSegments;
+ anyNonBulkMerges |= merger.getMatchedSubReaderCount() != merge.readers.size();
- assert mergedDocCount == totDocCount;
+ assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
// Very important to do this before opening the reader
// because codec must know if prox was written for
@@ -3347,6 +3407,10 @@ public class IndexWriter implements Clos
keepFullyDeletedSegments = true;
}
+ boolean getKeepFullyDeletedSegments() {
+ return keepFullyDeletedSegments;
+ }
+
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection<String> files = toSync.files(directory, false);
@@ -3402,12 +3466,8 @@ public class IndexWriter implements Clos
if (infoStream != null)
message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
- readerPool.commit();
-
+ readerPool.commit(segmentInfos);
toSync = (SegmentInfos) segmentInfos.clone();
- if (!keepFullyDeletedSegments) {
- toSync.pruneDeletedSegments();
- }
assert filesExist(toSync);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Mon Feb 21 14:13:28 2011
@@ -546,9 +546,10 @@ public abstract class LogMergePolicy ext
if (size < 1) {
size = 1;
}
- levels.add(new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i));
+ final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
+ levels.add(infoLevel);
if (verbose()) {
- message("seg " + info.name + " level=" + levels.get(i).level + " size=" + size);
+ message("seg " + info.name + " level=" + infoLevel.level + " size=" + size);
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/MergePolicy.java Mon Feb 21 14:13:28 2011
@@ -72,8 +72,8 @@ public abstract class MergePolicy implem
long mergeGen; // used by IndexWriter
boolean isExternal; // used by IndexWriter
int maxNumSegmentsOptimize; // used by IndexWriter
- SegmentReader[] readers; // used by IndexWriter
- SegmentReader[] readersClone; // used by IndexWriter
+ List<SegmentReader> readers; // used by IndexWriter
+ List<SegmentReader> readerClones; // used by IndexWriter
public final SegmentInfos segments;
boolean aborted;
Throwable error;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Mon Feb 21 14:13:28 2011
@@ -74,7 +74,7 @@ final class NormsWriterPerField extends
assert norms.length == upto;
norms = ArrayUtil.grow(norms, 1+upto);
}
- final float norm = similarity.computeNorm(fieldInfo.name, fieldState);
+ final float norm = similarity.computeNorm(fieldState);
norms[upto] = similarity.encodeNormValue(norm);
docIDs[upto] = docState.docID;
upto++;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/OrdTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/OrdTermState.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/OrdTermState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/OrdTermState.java Mon Feb 21 14:13:28 2011
@@ -30,4 +30,9 @@ public class OrdTermState extends TermSt
assert other instanceof OrdTermState : "can not copy from " + other.getClass().getName();
this.ord = ((OrdTermState) other).ord;
}
+
+ @Override
+ public String toString() {
+ return "OrdTermState ord=" + ord;
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Payload.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Payload.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Payload.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Payload.java Mon Feb 21 14:13:28 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.ArrayUtil;
@@ -34,7 +32,7 @@ import org.apache.lucene.util.ArrayUtil;
* to retrieve the payloads from the index.<br>
*
*/
-public class Payload implements Serializable, Cloneable {
+public class Payload implements Cloneable {
/** the byte array containing the payload data */
protected byte[] data;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Mon Feb 21 14:13:28 2011
@@ -58,20 +58,6 @@ public class SegmentWriteState {
* tweaking this is rarely useful.*/
public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
- /** Expert: The fraction of TermDocs entries stored in skip tables,
- * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
- * smaller indexes, greater acceleration, but fewer accelerable cases, while
- * smaller values result in bigger indexes, less acceleration and more
- * accelerable cases. More detailed experiments would be useful here. */
- public final int skipInterval = 16;
-
- /** Expert: The maximum number of skip levels. Smaller values result in
- * slightly smaller indexes, but slower skipping in big posting lists.
- */
- public final int maxSkipLevels = 10;
-
-
-
public SegmentWriteState(PrintStream infoStream, Directory directory, String segmentName, FieldInfos fieldInfos,
int numDocs, int termIndexInterval, SegmentCodecs segmentCodecs, BufferedDeletes segDeletes, AtomicLong bytesUsed) {
this.infoStream = infoStream;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Term.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/Term.java Mon Feb 21 14:13:28 2011
@@ -30,7 +30,7 @@ import org.apache.lucene.util.StringHelp
Note that terms may represent more than words from text fields, but also
things like dates, email addresses, urls, etc. */
-public final class Term implements Comparable<Term>, java.io.Serializable {
+public final class Term implements Comparable<Term> {
String field;
BytesRef bytes;
@@ -199,11 +199,4 @@ public final class Term implements Compa
@Override
public final String toString() { return field + ":" + bytes.utf8ToString(); }
-
- private void readObject(java.io.ObjectInputStream in)
- throws java.io.IOException, ClassNotFoundException
- {
- in.defaultReadObject();
- field = StringHelper.intern(field);
- }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermState.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermState.java Mon Feb 21 14:13:28 2011
@@ -44,4 +44,9 @@ public abstract class TermState implemen
throw new RuntimeException(cnse);
}
}
-}
\ No newline at end of file
+
+ @Override
+ public String toString() {
+ return "TermState";
+ }
+}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/TermVectorOffsetInfo.java Mon Feb 21 14:13:28 2011
@@ -1,7 +1,5 @@
package org.apache.lucene.index;
-import java.io.Serializable;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,7 +22,7 @@ import java.io.Serializable;
* offset information. This offset information is the character offset as set during the Analysis phase (and thus may not be the actual offset in the
* original content).
*/
-public class TermVectorOffsetInfo implements Serializable {
+public class TermVectorOffsetInfo {
/**
* Convenience declaration when creating a {@link org.apache.lucene.index.TermPositionVector} that stores only position information.
*/
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java Mon Feb 21 14:13:28 2011
@@ -51,6 +51,6 @@ public class BlockTermState extends OrdT
@Override
public String toString() {
- return super.toString() + "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
+ return "ord=" + ord + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termCount=" + termCount + " blockFP=" + blockFilePointer;
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Mon Feb 21 14:13:28 2011
@@ -67,9 +67,6 @@ public class BlockTermsReader extends Fi
private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
- // Comparator that orders our terms
- private final Comparator<BytesRef> termComp;
-
// Caches the most recently looked-up field + terms:
private final DoubleBarrelLRUCache<FieldAndTerm,BlockTermState> termsCache;
@@ -112,13 +109,12 @@ public class BlockTermsReader extends Fi
//private String segment;
public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, String segment, PostingsReaderBase postingsReader, int readBufferSize,
- Comparator<BytesRef> termComp, int termsCacheSize, String codecId)
+ int termsCacheSize, String codecId)
throws IOException {
this.postingsReader = postingsReader;
termsCache = new DoubleBarrelLRUCache<FieldAndTerm,BlockTermState>(termsCacheSize);
- this.termComp = termComp;
//this.segment = segment;
in = dir.openInput(IndexFileNames.segmentFileName(segment, codecId, BlockTermsWriter.TERMS_EXTENSION),
readBufferSize);
@@ -266,7 +262,7 @@ public class BlockTermsReader extends Fi
@Override
public Comparator<BytesRef> getComparator() {
- return termComp;
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
@@ -348,23 +344,29 @@ public class BlockTermsReader extends Fi
@Override
public Comparator<BytesRef> getComparator() {
- return termComp;
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
}
+ // TODO: we may want an alternate mode here which is
+ // "if you are about to return NOT_FOUND I won't use
+ // the terms data from that"; eg FuzzyTermsEnum will
+ // (usually) just immediately call seek again if we
+ // return NOT_FOUND so it's a waste for us to fill in
+ // the term that was actually NOT_FOUND
@Override
public SeekStatus seek(final BytesRef target, final boolean useCache) throws IOException {
if (indexEnum == null) {
throw new IllegalStateException("terms index was not loaded");
}
-
- //System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
+
/*
+ System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
if (didIndexNext) {
if (nextIndexTerm == null) {
- //System.out.println(" nextIndexTerm=null");
+ System.out.println(" nextIndexTerm=null");
} else {
- //System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
+ System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
}
}
*/
@@ -392,7 +394,7 @@ public class BlockTermsReader extends Fi
// is after current term but before next index term:
if (indexIsCurrent) {
- final int cmp = termComp.compare(term, target);
+ final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term, target);
if (cmp == 0) {
// Already at the requested term
@@ -410,7 +412,7 @@ public class BlockTermsReader extends Fi
didIndexNext = true;
}
- if (nextIndexTerm == null || termComp.compare(target, nextIndexTerm) < 0) {
+ if (nextIndexTerm == null || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
// Optimization: requested term is within the
// same term block we are now in; skip seeking
// (but do scanning):
@@ -440,48 +442,175 @@ public class BlockTermsReader extends Fi
state.ord = indexEnum.ord()-1;
}
- // NOTE: the first _next() after an index seek is
- // a bit wasteful, since it redundantly reads some
- // suffix bytes into the buffer. We could avoid storing
- // those bytes in the primary file, but then when
- // next()ing over an index term we'd have to
- // special case it:
term.copy(indexEnum.term());
//System.out.println(" seek: term=" + term.utf8ToString());
} else {
- ////System.out.println(" skip seek");
+ //System.out.println(" skip seek");
+ if (state.termCount == state.blockTermCount && !nextBlock()) {
+ indexIsCurrent = false;
+ return SeekStatus.END;
+ }
}
seekPending = false;
- // Now scan:
- while (_next() != null) {
- final int cmp = termComp.compare(term, target);
- if (cmp == 0) {
- // Match!
- if (useCache) {
- // Store in cache
- decodeMetaData();
- termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
+ int common = 0;
+
+ // Scan within block. We could do this by calling
+ // _next() and testing the resulting term, but this
+ // is wasteful. Instead, we first confirm the
+ // target matches the common prefix of this block,
+ // and then we scan the term bytes directly from the
+ // termSuffixesreader's byte[], saving a copy into
+ // the BytesRef term per term. Only when we return
+ // do we then copy the bytes into the term.
+
+ while(true) {
+
+ // First, see if target term matches common prefix
+ // in this block:
+ if (common < termBlockPrefix) {
+ final int cmp = (term.bytes[common]&0xFF) - (target.bytes[target.offset + common]&0xFF);
+ if (cmp < 0) {
+
+ // TODO: maybe we should store common prefix
+ // in block header? (instead of relying on
+ // last term of previous block)
+
+ // Target's prefix is after the common block
+ // prefix, so term cannot be in this block
+ // but it could be in next block. We
+ // must scan to end-of-block to set common
+ // prefix for next block:
+ if (state.termCount < state.blockTermCount) {
+ while(state.termCount < state.blockTermCount-1) {
+ state.termCount++;
+ state.ord++;
+ termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
+ }
+ final int suffix = termSuffixesReader.readVInt();
+ term.length = termBlockPrefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+ }
+ state.ord++;
+
+ if (!nextBlock()) {
+ indexIsCurrent = false;
+ return SeekStatus.END;
+ }
+ common = 0;
+
+ } else if (cmp > 0) {
+ // Target's prefix is before the common prefix
+ // of this block, so we position to start of
+ // block and return NOT_FOUND:
+ assert state.termCount == 0;
+
+ final int suffix = termSuffixesReader.readVInt();
+ term.length = termBlockPrefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+ return SeekStatus.NOT_FOUND;
+ } else {
+ common++;
+ }
+
+ continue;
+ }
+
+ // Test every term in this block
+ while (true) {
+ state.termCount++;
+ state.ord++;
+
+ final int suffix = termSuffixesReader.readVInt();
+
+ // We know the prefix matches, so just compare the new suffix:
+ final int termLen = termBlockPrefix + suffix;
+ int bytePos = termSuffixesReader.getPosition();
+
+ boolean next = false;
+ final int limit = target.offset + (termLen < target.length ? termLen : target.length);
+ int targetPos = target.offset + termBlockPrefix;
+ while(targetPos < limit) {
+ final int cmp = (termSuffixes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+ if (cmp < 0) {
+ // Current term is still before the target;
+ // keep scanning
+ next = true;
+ break;
+ } else if (cmp > 0) {
+ // Done! Current term is after target. Stop
+ // here, fill in real term, return NOT_FOUND.
+ term.length = termBlockPrefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+ //System.out.println(" NOT_FOUND");
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+
+ if (!next && target.length <= termLen) {
+ term.length = termBlockPrefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+
+ if (target.length == termLen) {
+ // Done! Exact match. Stop here, fill in
+ // real term, return FOUND.
+ //System.out.println(" FOUND");
+
+ if (useCache) {
+ // Store in cache
+ decodeMetaData();
+ //System.out.println(" cache! state=" + state);
+ termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
+ }
+
+ return SeekStatus.FOUND;
+ } else {
+ //System.out.println(" NOT_FOUND");
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+
+ if (state.termCount == state.blockTermCount) {
+ // Must pre-fill term for next block's common prefix
+ term.length = termBlockPrefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
+ break;
+ } else {
+ termSuffixesReader.skipBytes(suffix);
}
- //System.out.println(" FOUND");
- return SeekStatus.FOUND;
- } else if (cmp > 0) {
- //System.out.println(" NOT_FOUND term=" + term.utf8ToString());
- return SeekStatus.NOT_FOUND;
}
-
+
// The purpose of the terms dict index is to seek
// the enum to the closest index term before the
// term we are looking for. So, we should never
// cross another index term (besides the first
// one) while we are scanning:
+
assert indexIsCurrent;
- }
- indexIsCurrent = false;
- //System.out.println(" END");
- return SeekStatus.END;
+ if (!nextBlock()) {
+ //System.out.println(" END");
+ indexIsCurrent = false;
+ return SeekStatus.END;
+ }
+ common = 0;
+ }
}
@Override
@@ -521,12 +650,10 @@ public class BlockTermsReader extends Fi
decode all metadata up to the current term. */
private BytesRef _next() throws IOException {
//System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termCount + " (vs " + state.blockTermCount + ")");
- if (state.termCount == state.blockTermCount) {
- if (!nextBlock()) {
- //System.out.println(" eof");
- indexIsCurrent = false;
- return null;
- }
+ if (state.termCount == state.blockTermCount && !nextBlock()) {
+ //System.out.println(" eof");
+ indexIsCurrent = false;
+ return null;
}
// TODO: cutover to something better for these ints! simple64?
@@ -695,7 +822,7 @@ public class BlockTermsReader extends Fi
}
//System.out.println(" termSuffixes len=" + len);
in.readBytes(termSuffixes, 0, len);
- termSuffixesReader.reset(termSuffixes);
+ termSuffixesReader.reset(termSuffixes, 0, len);
// docFreq, totalTermFreq
len = in.readVInt();
@@ -704,7 +831,7 @@ public class BlockTermsReader extends Fi
}
//System.out.println(" freq bytes len=" + len);
in.readBytes(docFreqBytes, 0, len);
- freqReader.reset(docFreqBytes);
+ freqReader.reset(docFreqBytes, 0, len);
metaDataUpto = 0;
state.termCount = 0;
@@ -723,23 +850,32 @@ public class BlockTermsReader extends Fi
if (!seekPending) {
// lazily catch up on metadata decode:
final int limit = state.termCount;
+ // We must set/incr state.termCount because
+ // postings impl can look at this
state.termCount = metaDataUpto;
+ // TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
- //System.out.println(" decode");
+ //System.out.println(" decode mdUpto=" + metaDataUpto);
// TODO: we could make "tiers" of metadata, ie,
// decode docFreq/totalTF but don't decode postings
// metadata; this way caller could get
// docFreq/totalTF w/o paying decode cost for
// postings
+
+ // TODO: if docFreq were bulk decoded we could
+ // just skipN here:
state.docFreq = freqReader.readVInt();
+ //System.out.println(" dF=" + state.docFreq);
if (!fieldInfo.omitTermFreqAndPositions) {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
+ //System.out.println(" totTF=" + state.totalTermFreq);
}
+
postingsReader.nextTerm(fieldInfo, state);
metaDataUpto++;
state.termCount++;
}
- } else {
+ //} else {
//System.out.println(" skip! seekPending");
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java Mon Feb 21 14:13:28 2011
@@ -63,24 +63,23 @@ public class BlockTermsWriter extends Fi
FieldInfo currentField;
private final TermsIndexWriterBase termsIndexWriter;
private final List<TermsWriter> fields = new ArrayList<TermsWriter>();
- private final Comparator<BytesRef> termComp;
- private final String segment;
+
+ //private final String segment;
public BlockTermsWriter(
TermsIndexWriterBase termsIndexWriter,
SegmentWriteState state,
- PostingsWriterBase postingsWriter,
- Comparator<BytesRef> termComp) throws IOException
+ PostingsWriterBase postingsWriter)
+ throws IOException
{
final String termsFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_EXTENSION);
this.termsIndexWriter = termsIndexWriter;
- this.termComp = termComp;
out = state.directory.createOutput(termsFileName);
fieldInfos = state.fieldInfos;
writeHeader(out);
currentField = null;
this.postingsWriter = postingsWriter;
- segment = state.segmentName;
+ //segment = state.segmentName;
//System.out.println("BTW.init seg=" + state.segmentName);
@@ -161,7 +160,6 @@ public class BlockTermsWriter extends Fi
private long numTerms;
private final TermsIndexWriterBase.FieldWriter fieldIndexWriter;
long sumTotalTermFreq;
- private final BytesRef lastTerm = new BytesRef();
private TermEntry[] pendingTerms;
@@ -185,12 +183,12 @@ public class BlockTermsWriter extends Fi
@Override
public Comparator<BytesRef> getComparator() {
- return termComp;
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
- //System.out.println("BTW.startTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text);
+ //System.out.println("BTW.startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
postingsWriter.startTerm();
return postingsWriter;
}
@@ -201,7 +199,7 @@ public class BlockTermsWriter extends Fi
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
- //System.out.println("BTW.finishTerm seg=" + segment + " term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " df=" + stats.docFreq);
+ //System.out.println("BTW.finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
@@ -213,6 +211,7 @@ public class BlockTermsWriter extends Fi
flushBlock();
}
fieldIndexWriter.add(text, stats, out.getFilePointer());
+ //System.out.println(" index term!");
}
if (pendingTerms.length == pendingCount) {
@@ -265,7 +264,7 @@ public class BlockTermsWriter extends Fi
private final RAMOutputStream bytesWriter = new RAMOutputStream();
private void flushBlock() throws IOException {
- //System.out.println("BTW.flushBlock pendingCount=" + pendingCount);
+ //System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
// First pass: compute common prefix for all terms
// in the block, against term before first term in
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java Mon Feb 21 14:13:28 2011
@@ -24,11 +24,6 @@ import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
-import org.apache.lucene.index.codecs.preflex.PreFlexCodec;
-import org.apache.lucene.index.codecs.pulsing.PulsingCodec;
-import org.apache.lucene.index.codecs.simpletext.SimpleTextCodec;
-import org.apache.lucene.index.codecs.standard.StandardCodec;
-
/** Holds a set of codecs, keyed by name. You subclass
* this, instantiate it, and register your codecs, then
* pass this instance to IndexReader/IndexWriter (via
@@ -97,7 +92,7 @@ public class CodecProvider {
return infosReader;
}
- static private CodecProvider defaultCodecs = new DefaultCodecProvider();
+ static private CodecProvider defaultCodecs = new CoreCodecProvider();
public static CodecProvider getDefault() {
return defaultCodecs;
@@ -185,12 +180,3 @@ public class CodecProvider {
setDefaultFieldCodec(other.getDefaultFieldCodec());
}
}
-
-class DefaultCodecProvider extends CodecProvider {
- DefaultCodecProvider() {
- register(new StandardCodec());
- register(new PreFlexCodec());
- register(new PulsingCodec(1));
- register(new SimpleTextCodec());
- }
-}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Mon Feb 21 14:13:28 2011
@@ -152,36 +152,17 @@ public abstract class FixedIntBlockIndex
@Override
public void read(final DataInput indexIn, final boolean absolute) throws IOException {
if (absolute) {
- fp = indexIn.readVLong();
upto = indexIn.readVInt();
- } else {
- final long delta = indexIn.readVLong();
- if (delta == 0) {
- // same block
- upto += indexIn.readVInt();
- } else {
- // new block
- fp += delta;
- upto = indexIn.readVInt();
- }
- }
- assert upto < blockSize;
- }
-
- @Override
- public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException {
- if (absolute) {
fp = indexIn.readVLong();
- upto = indexIn.next();
} else {
- final long delta = indexIn.readVLong();
- if (delta == 0) {
+ final int uptoDelta = indexIn.readVInt();
+ if ((uptoDelta & 1) == 1) {
// same block
- upto += indexIn.next();
+ upto += uptoDelta >>> 1;
} else {
// new block
- fp += delta;
- upto = indexIn.next();
+ upto = uptoDelta >>> 1;
+ fp += indexIn.readVLong();
}
}
assert upto < blockSize;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexOutput.java Mon Feb 21 14:13:28 2011
@@ -77,36 +77,17 @@ public abstract class FixedIntBlockIndex
@Override
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
if (absolute) {
- indexOut.writeVLong(fp);
- indexOut.writeVInt(upto);
- } else if (fp == lastFP) {
- // same block
- indexOut.writeVLong(0);
- assert upto >= lastUpto;
- indexOut.writeVInt(upto - lastUpto);
- } else {
- // new block
- indexOut.writeVLong(fp - lastFP);
indexOut.writeVInt(upto);
- }
- lastUpto = upto;
- lastFP = fp;
- }
-
- @Override
- public void write(IntIndexOutput indexOut, boolean absolute) throws IOException {
- if (absolute) {
indexOut.writeVLong(fp);
- indexOut.write(upto);
} else if (fp == lastFP) {
// same block
- indexOut.writeVLong(0);
assert upto >= lastUpto;
- indexOut.write(upto - lastUpto);
+ int uptoDelta = upto - lastUpto;
+ indexOut.writeVInt(uptoDelta << 1 | 1);
} else {
// new block
+ indexOut.writeVInt(upto << 1);
indexOut.writeVLong(fp - lastFP);
- indexOut.write(upto);
}
lastUpto = upto;
lastFP = fp;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Mon Feb 21 14:13:28 2011
@@ -171,17 +171,17 @@ public abstract class VariableIntBlockIn
@Override
public void read(final DataInput indexIn, final boolean absolute) throws IOException {
if (absolute) {
+ upto = indexIn.readVInt();
fp = indexIn.readVLong();
- upto = indexIn.readByte()&0xFF;
} else {
- final long delta = indexIn.readVLong();
- if (delta == 0) {
+ final int uptoDelta = indexIn.readVInt();
+ if ((uptoDelta & 1) == 1) {
// same block
- upto = indexIn.readByte()&0xFF;
+ upto += uptoDelta >>> 1;
} else {
// new block
- fp += delta;
- upto = indexIn.readByte()&0xFF;
+ upto = uptoDelta >>> 1;
+ fp += indexIn.readVLong();
}
}
// TODO: we can't do this assert because non-causal
@@ -190,24 +190,6 @@ public abstract class VariableIntBlockIn
}
@Override
- public void read(final IntIndexInput.Reader indexIn, final boolean absolute) throws IOException {
- if (absolute) {
- fp = indexIn.readVLong();
- upto = indexIn.next()&0xFF;
- } else {
- final long delta = indexIn.readVLong();
- if (delta == 0) {
- // same block
- upto = indexIn.next()&0xFF;
- } else {
- // new block
- fp += delta;
- upto = indexIn.next()&0xFF;
- }
- }
- }
-
- @Override
public String toString() {
return "VarIntBlock.Index fp=" + fp + " upto=" + upto + " maxBlock=" + maxBlockSize;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexOutput.java Mon Feb 21 14:13:28 2011
@@ -42,16 +42,14 @@ public abstract class VariableIntBlockIn
private int upto;
- private static final int MAX_BLOCK_SIZE = 1 << 8;
+ // TODO what Var-Var codecs exist in practice... and what are there blocksizes like?
+ // if its less than 128 we should set that as max and use byte?
- /** NOTE: maxBlockSize plus the max non-causal lookahead
- * of your codec must be less than 256. EG Simple9
+ /** NOTE: maxBlockSize must be the maximum block size
+ * plus the max non-causal lookahead of your codec. EG Simple9
* requires lookahead=1 because on seeing the Nth value
* it knows it must now encode the N-1 values before it. */
protected VariableIntBlockIndexOutput(IndexOutput out, int maxBlockSize) throws IOException {
- if (maxBlockSize > MAX_BLOCK_SIZE) {
- throw new IllegalArgumentException("maxBlockSize must be <= " + MAX_BLOCK_SIZE + "; got " + maxBlockSize);
- }
this.out = out;
out.writeInt(maxBlockSize);
}
@@ -88,37 +86,17 @@ public abstract class VariableIntBlockIn
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
assert upto >= 0;
if (absolute) {
+ indexOut.writeVInt(upto);
indexOut.writeVLong(fp);
- indexOut.writeByte((byte) upto);
- } else if (fp == lastFP) {
- // same block
- indexOut.writeVLong(0);
- assert upto >= lastUpto;
- indexOut.writeByte((byte) upto);
- } else {
- // new block
- indexOut.writeVLong(fp - lastFP);
- indexOut.writeByte((byte) upto);
- }
- lastUpto = upto;
- lastFP = fp;
- }
-
- @Override
- public void write(IntIndexOutput indexOut, boolean absolute) throws IOException {
- assert upto >= 0;
- if (absolute) {
- indexOut.writeVLong(fp);
- indexOut.write(upto);
} else if (fp == lastFP) {
// same block
- indexOut.writeVLong(0);
assert upto >= lastUpto;
- indexOut.write(upto);
+ int uptoDelta = upto - lastUpto;
+ indexOut.writeVInt(uptoDelta << 1 | 1);
} else {
// new block
+ indexOut.writeVInt(upto << 1);
indexOut.writeVLong(fp - lastFP);
- indexOut.write(upto);
}
lastUpto = upto;
lastFP = fp;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/SegmentTermDocs.java Mon Feb 21 14:13:28 2011
@@ -209,7 +209,8 @@ public class SegmentTermDocs {
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
- if (df >= skipInterval) { // optimized case
+ // don't skip if the target is close (within skipInterval docs away)
+ if ((target - skipInterval) >= doc && df >= skipInterval) { // optimized case
if (skipListReader == null)
skipListReader = new DefaultSkipListReader((IndexInput) freqStream.clone(), maxSkipLevels, skipInterval); // lazily clone
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Mon Feb 21 14:13:28 2011
@@ -89,7 +89,7 @@ public class PulsingCodec extends Codec
// Terms dict
success = false;
try {
- FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter);
success = true;
return ret;
} finally {
@@ -136,7 +136,6 @@ public class PulsingCodec extends Codec
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
StandardCodec.TERMS_CACHE_SIZE,
state.codecId);
success = true;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Mon Feb 21 14:13:28 2011
@@ -144,7 +144,7 @@ public class PulsingPostingsReaderImpl e
//System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) {
- //System.out.println(" inlined");
+ //System.out.println(" inlined pos=" + termState.inlinedBytesReader.getPosition());
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Mon Feb 21 14:13:28 2011
@@ -41,8 +41,6 @@ public abstract class IntIndexInput impl
public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
- public abstract void read(IntIndexInput.Reader indexIn, boolean absolute) throws IOException;
-
/** Seeks primary stream to the last read offset */
public abstract void seek(IntIndexInput.Reader stream) throws IOException;
@@ -57,18 +55,6 @@ public abstract class IntIndexInput impl
/** Reads next single int */
public abstract int next() throws IOException;
- /** Encodes as 1 or 2 ints, and can only use 61 of the 64
- * long bits. */
- public long readVLong() throws IOException {
- final int v = next();
- if ((v & 1) == 0) {
- return v >> 1;
- } else {
- final long v2 = next();
- return (v2 << 30) | (v >> 1);
- }
- }
-
/** Reads next chunk of ints */
private IntsRef bulkResult;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexOutput.java Mon Feb 21 14:13:28 2011
@@ -38,23 +38,6 @@ public abstract class IntIndexOutput imp
* >= 0. */
public abstract void write(int v) throws IOException;
- public static final long MAX_SINGLE_INT_VLONG = Integer.MAX_VALUE - (1<<30);
- public static final long MAX_VLONG = Long.MAX_VALUE - (1L<<62) - (1L<<61);
-
- /** Encodes as 1 or 2 ints, and can only use 61 of the 64
- * long bits. */
- public void writeVLong(long v) throws IOException {
- assert v >= 0: "v=" + v;
- assert v < MAX_VLONG: "v=" + v;
- // we cannot pass a negative int
- if (v <= MAX_SINGLE_INT_VLONG) {
- write(((int) v)<<1);
- } else {
- write(((int) ((v & MAX_SINGLE_INT_VLONG))<<1) | 1);
- write(((int) (v >> 30)));
- }
- }
-
public abstract static class Index {
/** Internally records the current location */
@@ -66,8 +49,6 @@ public abstract class IntIndexOutput imp
/** Writes "location" of current output pointer of primary
* output to different output (out) */
public abstract void write(IndexOutput indexOut, boolean absolute) throws IOException;
-
- public abstract void write(IntIndexOutput indexOut, boolean absolute) throws IOException;
}
/** If you are indexing the primary output file, call
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java Mon Feb 21 14:13:28 2011
@@ -56,6 +56,7 @@ public class SepPostingsReaderImpl exten
int skipInterval;
int maxSkipLevels;
+ int skipMinimum;
public SepPostingsReaderImpl(Directory dir, SegmentInfo segmentInfo, int readBufferSize, IntStreamFactory intFactory, String codecId) throws IOException {
@@ -102,6 +103,7 @@ public class SepPostingsReaderImpl exten
SepPostingsWriterImpl.VERSION_START, SepPostingsWriterImpl.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
+ skipMinimum = termsIn.readInt();
}
@Override
@@ -231,7 +233,7 @@ public class SepPostingsReaderImpl exten
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
- if (termState.docFreq >= skipInterval) {
+ if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.pos);
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
@@ -240,7 +242,7 @@ public class SepPostingsReaderImpl exten
}
//System.out.println(" skipFP=" + termState.skipFP);
} else if (isFirstTerm) {
- termState.skipFP = termState.bytesReader.readVLong();
+ termState.skipFP = 0;
}
}
@@ -344,7 +346,7 @@ public class SepPostingsReaderImpl exten
}
docFreq = termState.docFreq;
- // NOTE: unused if docFreq < skipInterval:
+ // NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = 0;
@@ -420,13 +422,10 @@ public class SepPostingsReaderImpl exten
@Override
public int advance(int target) throws IOException {
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (docFreq >= skipInterval) {
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
// This DocsEnum has never done any skipping
@@ -599,13 +598,10 @@ public class SepPostingsReaderImpl exten
public int advance(int target) throws IOException {
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
- // TODO: jump right to next() if target is < X away
- // from where we are now?
-
- if (docFreq >= skipInterval) {
+ if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
- // skip data
+ // skip data, and its not too close
if (skipper == null) {
//System.out.println(" create skipper");
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java Mon Feb 21 14:13:28 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -63,8 +64,23 @@ public final class SepPostingsWriterImpl
IndexOutput termsOut;
final SepSkipListWriter skipListWriter;
- final int skipInterval;
- final int maxSkipLevels;
+ /** Expert: The fraction of TermDocs entries stored in skip tables,
+ * used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
+ * smaller indexes, greater acceleration, but fewer accelerable cases, while
+ * smaller values result in bigger indexes, less acceleration and more
+ * accelerable cases. More detailed experiments would be useful here. */
+ final int skipInterval = 16;
+
+ /**
+ * Expert: minimum docFreq to write any skip data at all
+ */
+ final int skipMinimum = skipInterval;
+
+ /** Expert: The maximum number of skip levels. Smaller values result in
+ * slightly smaller indexes, but slower skipping in big posting lists.
+ */
+ final int maxSkipLevels = 10;
+
final int totalNumDocs;
boolean storePayloads;
@@ -118,15 +134,11 @@ public final class SepPostingsWriterImpl
totalNumDocs = state.numDocs;
- // TODO: -- abstraction violation
- skipListWriter = new SepSkipListWriter(state.skipInterval,
- state.maxSkipLevels,
+ skipListWriter = new SepSkipListWriter(skipInterval,
+ maxSkipLevels,
state.numDocs,
freqOut, docOut,
posOut, payloadOut);
-
- skipInterval = state.skipInterval;
- maxSkipLevels = state.maxSkipLevels;
}
@Override
@@ -136,6 +148,7 @@ public final class SepPostingsWriterImpl
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
+ termsOut.writeInt(skipMinimum); // write skipMinimum
}
@Override
@@ -264,7 +277,7 @@ public final class SepPostingsWriterImpl
}
}
- if (df >= skipInterval) {
+ if (df >= skipMinimum) {
//System.out.println(" skipFP=" + skipStart);
final long skipFP = skipOut.getFilePointer();
skipListWriter.writeSkip(skipOut);
@@ -276,12 +289,8 @@ public final class SepPostingsWriterImpl
}
lastSkipFP = skipFP;
} else if (isFirstTerm) {
- // TODO: this is somewhat wasteful; eg if no terms in
- // this block will use skip data, we don't need to
- // write this:
- final long skipFP = skipOut.getFilePointer();
- indexBytesWriter.writeVLong(skipFP);
- lastSkipFP = skipFP;
+ // lazily write an absolute delta if a term in this block requires skip data.
+ lastSkipFP = 0;
}
lastDocID = 0;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListReader.java Mon Feb 21 14:13:28 2011
@@ -197,7 +197,9 @@ class SepSkipListReader extends MultiLev
docIndex[level].read(skipStream, false);
if (!omitTF) {
posIndex[level].read(skipStream, false);
- payloadPointer[level] += skipStream.readVInt();
+ if (currentFieldStoresPayloads) {
+ payloadPointer[level] += skipStream.readVInt();
+ }
}
return delta;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/sep/SepSkipListWriter.java Mon Feb 21 14:13:28 2011
@@ -188,7 +188,9 @@ class SepSkipListWriter extends MultiLev
if (!omitTF) {
posIndex[level].mark();
posIndex[level].write(skipBuffer, false);
- skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level]));
+ if (curStorePayloads) {
+ skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level]));
+ }
}
lastSkipDoc[level] = curDoc;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java?rev=1072973&r1=1072972&r2=1072973&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java Mon Feb 21 14:13:28 2011
@@ -23,7 +23,6 @@ import java.util.Set;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
@@ -66,7 +65,7 @@ public class StandardCodec extends Codec
success = false;
try {
- FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
success = true;
return ret;
} finally {
@@ -109,7 +108,6 @@ public class StandardCodec extends Codec
state.segmentInfo.name,
postings,
state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
TERMS_CACHE_SIZE,
state.codecId);
success = true;