You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cu...@apache.org on 2004/10/08 17:58:49 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/index TermBuffer.java SegmentTermEnum.java TermInfosReader.java
cutting 2004/10/08 08:58:49
Modified: . CHANGES.txt
src/java/org/apache/lucene/index SegmentTermEnum.java
TermInfosReader.java
Added: src/java/org/apache/lucene/index TermBuffer.java
Log:
Optimize term dictionary lookup to allocate fewer terms.
Revision Changes Path
1.118 +6 -1 jakarta-lucene/CHANGES.txt
Index: CHANGES.txt
===================================================================
RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
retrieving revision 1.117
retrieving revision 1.118
diff -u -r1.117 -r1.118
--- CHANGES.txt 6 Oct 2004 12:15:05 -0000 1.117
+++ CHANGES.txt 8 Oct 2004 15:58:49 -0000 1.118
@@ -97,6 +97,11 @@
21. Add a serializable Parameter Class to standardize parameter enum
classes in BooleanClause and Field. (Christoph)
+22. Optimize term-dictionary lookup to allocate far fewer terms when
+ scanning for the matching term. This speeds searches involving
+ low-frequency terms, where the cost of dictionary lookup can be
+ significant. (cutting)
+
1.4.1
1.9 +25 -28 jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java
Index: SegmentTermEnum.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- SegmentTermEnum.java 16 Sep 2004 21:13:37 -0000 1.8
+++ SegmentTermEnum.java 8 Oct 2004 15:58:49 -0000 1.9
@@ -25,7 +25,10 @@
long size;
long position = -1;
- private Term term = new Term("", "");
+ private TermBuffer termBuffer = new TermBuffer();
+ private TermBuffer prevBuffer = new TermBuffer();
+ private TermBuffer scratch; // used for scanning
+
private TermInfo termInfo = new TermInfo();
private int format;
@@ -34,9 +37,6 @@
int indexInterval;
int skipInterval;
private int formatM1SkipInterval;
- Term prev;
-
- private char[] buffer = {};
SegmentTermEnum(IndexInput i, FieldInfos fis, boolean isi)
throws IOException {
@@ -89,7 +89,10 @@
clone.input = (IndexInput) input.clone();
clone.termInfo = new TermInfo(termInfo);
- if (term != null) clone.growBuffer(term.text.length());
+
+ clone.termBuffer = (TermBuffer)termBuffer.clone();
+ clone.prevBuffer = (TermBuffer)prevBuffer.clone();
+ clone.scratch = null;
return clone;
}
@@ -98,21 +101,20 @@
throws IOException {
input.seek(pointer);
position = p;
- term = t;
- prev = null;
+ termBuffer.set(t);
+ prevBuffer.reset();
termInfo.set(ti);
- growBuffer(term.text.length()); // copy term text into buffer
}
/** Increments the enumeration to the next element. True if one exists.*/
public final boolean next() throws IOException {
if (position++ >= size - 1) {
- term = null;
+ termBuffer.reset();
return false;
}
- prev = term;
- term = readTerm();
+ prevBuffer.set(termBuffer);
+ termBuffer.read(input, fieldInfos);
termInfo.docFreq = input.readVInt(); // read doc freq
termInfo.freqPointer += input.readVLong(); // read freq pointer
@@ -138,28 +140,23 @@
return true;
}
- private final Term readTerm() throws IOException {
- int start = input.readVInt();
- int length = input.readVInt();
- int totalLength = start + length;
- if (buffer.length < totalLength)
- growBuffer(totalLength);
-
- input.readChars(buffer, start, length);
- return new Term(fieldInfos.fieldName(input.readVInt()),
- new String(buffer, 0, totalLength), false);
- }
-
- private final void growBuffer(int length) {
- buffer = new char[length];
- for (int i = 0; i < term.text.length(); i++) // copy contents
- buffer[i] = term.text.charAt(i);
+ /** Optimized scan, without allocating new terms. */
+ final void scanTo(Term term) throws IOException {
+ if (scratch == null)
+ scratch = new TermBuffer();
+ scratch.set(term);
+ while (scratch.compareTo(termBuffer) > 0 && next()) {}
}
/** Returns the current Term in the enumeration.
Initially invalid, valid after next() called for the first time.*/
public final Term term() {
- return term;
+ return termBuffer.toTerm();
+ }
+
+ /** Returns the previous Term enumerated. Initially null.*/
+ final Term prev() {
+ return prevBuffer.toTerm();
}
/** Returns the current TermInfo in the enumeration.
1.11 +2 -2 jakarta-lucene/src/java/org/apache/lucene/index/TermInfosReader.java
Index: TermInfosReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfosReader.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- TermInfosReader.java 16 Sep 2004 21:13:37 -0000 1.10
+++ TermInfosReader.java 8 Oct 2004 15:58:49 -0000 1.11
@@ -129,7 +129,7 @@
// optimize sequential access: first try scanning cached enum w/o seeking
SegmentTermEnum enumerator = getEnum();
if (enumerator.term() != null // term is at or past current
- && ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
+ && ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
|| term.compareTo(enumerator.term()) >= 0)) {
int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
if (indexTerms.length == enumOffset // but before end of block
@@ -145,7 +145,7 @@
/** Scans within block for matching term. */
private final TermInfo scanEnum(Term term) throws IOException {
SegmentTermEnum enumerator = getEnum();
- while (term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
+ enumerator.scanTo(term);
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
return enumerator.termInfo();
else
1.1 jakarta-lucene/src/java/org/apache/lucene/index/TermBuffer.java
Index: TermBuffer.java
===================================================================
package org.apache.lucene.index;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
final class TermBuffer implements Cloneable {
private static final char[] NO_CHARS = new char[0];
private String field;
private char[] text = NO_CHARS;
private int textLength;
private Term term; // cached
public final int compareTo(TermBuffer other) {
if (field == other.field) // fields are interned
return compareChars(text, textLength, other.text, other.textLength);
else
return field.compareTo(other.field);
}
private static final int compareChars(char[] v1, int len1,
char[] v2, int len2) {
int end = Math.min(len1, len2);
for (int k = 0; k < end; k++) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
}
return len1 - len2;
}
private final void setTextLength(int newLength) {
if (text.length < newLength) {
char[] newText = new char[newLength];
System.arraycopy(text, 0, newText, 0, textLength);
text = newText;
}
textLength = newLength;
}
public final void read(IndexInput input, FieldInfos fieldInfos)
throws IOException {
this.term = null; // invalidate cache
int start = input.readVInt();
int length = input.readVInt();
int totalLength = start + length;
setTextLength(totalLength);
input.readChars(this.text, start, length);
this.field = fieldInfos.fieldName(input.readVInt());
}
public final void set(Term term) {
if (term == null) {
reset();
return;
}
// copy text into the buffer
setTextLength(term.text().length());
term.text().getChars(0, term.text().length(), text, 0);
this.field = term.field();
this.term = term;
}
public final void set(TermBuffer other) {
setTextLength(other.textLength);
System.arraycopy(other.text, 0, text, 0, textLength);
this.field = other.field;
this.term = other.term;
}
public void reset() {
this.field = null;
this.textLength = 0;
this.term = null;
}
public Term toTerm() {
if (field == null) // unset
return null;
if (term == null)
term = new Term(field, new String(text, 0, textLength), false);
return term;
}
protected Object clone() {
TermBuffer clone = null;
try {
clone = (TermBuffer)super.clone();
} catch (CloneNotSupportedException e) {}
clone.text = new char[text.length];
System.arraycopy(text, 0, clone.text, 0, textLength);
return clone;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org