You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by go...@apache.org on 2004/04/20 15:47:58 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/index SegmentTermEnum.java SegmentTermDocs.java SegmentMerger.java TermInfosWriter.java
goller 2004/04/20 06:47:58
Modified: src/java/org/apache/lucene/index SegmentTermEnum.java
SegmentTermDocs.java SegmentMerger.java
TermInfosWriter.java
Log:
hopefully corrected or at least improved version of skipTo
Revision Changes Path
1.7 +26 -8 jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
Index: SegmentTermEnum.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- SegmentTermEnum.java 19 Apr 2004 14:46:00 -0000 1.6
+++ SegmentTermEnum.java 20 Apr 2004 13:47:58 -0000 1.7
@@ -33,6 +33,7 @@
long indexPointer = 0;
int indexInterval;
int skipInterval;
+ private int formatM1SkipInterval;
Term prev;
private char[] buffer = {};
@@ -51,7 +52,7 @@
// back-compatible settings
indexInterval = 128;
- skipInterval = Integer.MAX_VALUE;
+ skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization
} else {
// we have a format version number
@@ -62,8 +63,17 @@
throw new IOException("Unknown format version:" + format);
size = input.readLong(); // read the size
-
- if (!isIndex) {
+
+ if(format == -1){
+ if (!isIndex) {
+ indexInterval = input.readInt();
+ formatM1SkipInterval = input.readInt();
+ }
+ // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in
+ // skipTo implementation of these versions
+ skipInterval = Integer.MAX_VALUE;
+ }
+ else{
indexInterval = input.readInt();
skipInterval = input.readInt();
}
@@ -107,13 +117,21 @@
termInfo.docFreq = input.readVInt(); // read doc freq
termInfo.freqPointer += input.readVLong(); // read freq pointer
termInfo.proxPointer += input.readVLong(); // read prox pointer
-
- if (!isIndex) {
- if (termInfo.docFreq > skipInterval) {
- termInfo.skipOffset = input.readVInt();
+
+ if(format == -1){
+ // just read skipOffset in order to increment file pointer;
+ // value is never used since skipTo is switched off
+ if (!isIndex) {
+ if (termInfo.docFreq > formatM1SkipInterval) {
+ termInfo.skipOffset = input.readVInt();
+ }
}
}
-
+ else{
+ if (termInfo.docFreq >= skipInterval)
+ termInfo.skipOffset = input.readVInt();
+ }
+
if (isIndex)
indexPointer += input.readVLong(); // read index pointer
1.12 +3 -1 jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java
Index: SegmentTermDocs.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- SegmentTermDocs.java 19 Apr 2004 19:32:20 -0000 1.11
+++ SegmentTermDocs.java 20 Apr 2004 13:47:58 -0000 1.12
@@ -84,6 +84,8 @@
public void close() throws IOException {
freqStream.close();
+ if (skipStream != null)
+ skipStream.close();
}
public final int doc() { return doc; }
@@ -143,7 +145,7 @@
/** Optimized implementation. */
public boolean skipTo(int target) throws IOException {
- if (df > skipInterval) { // optimized case
+ if (df >= skipInterval) { // optimized case
if (skipStream == null)
skipStream = (InputStream) freqStream.clone(); // lazily clone
1.10 +3 -2 jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java
Index: SegmentMerger.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- SegmentMerger.java 29 Mar 2004 22:48:02 -0000 1.9
+++ SegmentMerger.java 20 Apr 2004 13:47:58 -0000 1.10
@@ -234,6 +234,7 @@
private OutputStream freqOutput = null;
private OutputStream proxOutput = null;
private TermInfosWriter termInfosWriter = null;
+ private int skipInterval;
private SegmentMergeQueue queue = null;
private final void mergeTerms() throws IOException {
@@ -242,6 +243,8 @@
proxOutput = directory.createFile(segment + ".prx");
termInfosWriter =
new TermInfosWriter(directory, segment, fieldInfos);
+ skipInterval = termInfosWriter.skipInterval;
+ queue = new SegmentMergeQueue(readers.size());
mergeTermInfos();
@@ -254,7 +257,6 @@
}
private final void mergeTermInfos() throws IOException {
- queue = new SegmentMergeQueue(readers.size());
int base = 0;
for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i);
@@ -327,7 +329,6 @@
*/
private final int appendPostings(SegmentMergeInfo[] smis, int n)
throws IOException {
- final int skipInterval = termInfosWriter.skipInterval;
int lastDoc = 0;
int df = 0; // number of docs w/ term
resetSkip();
1.6 +6 -10 jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java
Index: TermInfosWriter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- TermInfosWriter.java 25 Mar 2004 13:49:49 -0000 1.5
+++ TermInfosWriter.java 20 Apr 2004 13:47:58 -0000 1.6
@@ -27,13 +27,13 @@
final class TermInfosWriter {
/** The file format version, a negative number. */
- public static final int FORMAT = -1;
+ public static final int FORMAT = -2;
private FieldInfos fieldInfos;
private OutputStream output;
private Term lastTerm = new Term("", "");
private TermInfo lastTi = new TermInfo();
- private int size = 0;
+ private long size = 0;
// TODO: the default values for these two parameters should be settable from
// IndexWriter. However, once that's done, folks will start setting them to
@@ -80,10 +80,8 @@
output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
output.writeInt(FORMAT); // write format
output.writeLong(0); // leave space for size
- if (!isIndex) {
- output.writeInt(indexInterval); // write indexInterval
- output.writeInt(skipInterval); // write skipInterval
- }
+ output.writeInt(indexInterval); // write indexInterval
+ output.writeInt(skipInterval); // write skipInterval
}
/** Adds a new <Term, TermInfo> pair to the set.
@@ -106,10 +104,8 @@
output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
output.writeVLong(ti.proxPointer - lastTi.proxPointer);
- if (!isIndex) {
- if (ti.docFreq > skipInterval) {
- output.writeVInt(ti.skipOffset);
- }
+ if (ti.docFreq >= skipInterval) {
+ output.writeVInt(ti.skipOffset);
}
if (isIndex) {
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org