You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cu...@apache.org on 2004/02/24 21:41:16 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/search StringSortedHitQueue.java
cutting 2004/02/24 12:41:16
Modified: src/java/org/apache/lucene/search StringSortedHitQueue.java
Log:
Fixed problem with sorting.
Revision Changes Path
1.3 +62 -14 jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java
Index: StringSortedHitQueue.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- StringSortedHitQueue.java 24 Feb 2004 19:34:58 -0000 1.2
+++ StringSortedHitQueue.java 24 Feb 2004 20:41:16 -0000 1.3
@@ -26,8 +26,9 @@
/**
* Expert: A sorted hit queue for fields that contain string values.
* Hits are sorted into the queue by the values in the field and then by document number.
- * The internal cache contains integers - the strings are sorted and
- * then only their sequence number cached.
+ * Warning: The internal cache could be quite large, depending on the number of terms
+ * in the field! All the terms are kept in memory, as well as a sorted array of
+ * integers representing their relative position.
*
* <p>Created: Feb 2, 2004 9:26:33 AM
*
@@ -68,21 +69,24 @@
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
- * @param field Field containg string values.
+ * @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
- static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
+ static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname)
throws IOException {
+ final String field = fieldname.intern();
return new ScoreDocLookupComparator() {
/** The sort information being used by this instance */
protected final int[] fieldOrder = generateSortIndex();
+ protected String[] terms;
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
+ final String[] mterms = new String[reader.maxDoc()]; // guess length
TermEnum enumerator = reader.terms (new Term (field, ""));
TermDocs termDocs = reader.termDocs();
@@ -98,22 +102,43 @@
// following loop will automatically sort the
// terms in the correct order.
+ // if a given document has more than one term
+ // in the field, only the last one will be used.
+
+ int t = 0; // current term number
try {
- int t = 0; // current term number
do {
Term term = enumerator.term();
if (term.field() != field) break;
- t++;
+
+ // store term text
+ // we expect that there is at most one term per document
+ if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
+ mterms[t] = term.text();
+
+ // store which documents use this term
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
+
+ t++;
} while (enumerator.next());
+
} finally {
enumerator.close();
termDocs.close();
}
+ // if there are less terms than documents,
+ // trim off the dead array space
+ if (t < mterms.length) {
+ terms = new String[t];
+ System.arraycopy (mterms, 0, terms, 0, t);
+ } else {
+ terms = mterms;
+ }
+
return retArray;
}
@@ -138,11 +163,11 @@
}
public Object sortValue (final ScoreDoc i) {
- return new Integer(fieldOrder[i.doc]);
+ return terms[fieldOrder[i.doc]];
}
public int sortType() {
- return SortField.INT;
+ return SortField.STRING;
}
};
}
@@ -152,20 +177,23 @@
* Returns a comparator for sorting hits according to a field containing strings using the given enumerator
* to collect term values.
* @param reader Index to use.
- * @param field Field containg string values.
+ * @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
- static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
+ static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname)
throws IOException {
+ final String field = fieldname.intern();
return new ScoreDocLookupComparator() {
protected final int[] fieldOrder = generateSortIndex();
+ protected String[] terms;
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
+ final String[] mterms = new String[reader.maxDoc()]; // guess length
// NOTE: the contract for TermEnum says the
// terms will be in natural order (which is
@@ -175,22 +203,42 @@
// following loop will automatically sort the
// terms in the correct order.
+ // if a given document has more than one term
+ // in the field, only the last one will be used.
+
TermDocs termDocs = reader.termDocs();
+ int t = 0; // current term number
try {
- int t = 0; // current term number
do {
Term term = enumerator.term();
if (term.field() != field) break;
- t++;
+
+ // store term text
+ // we expect that there is at most one term per document
+ if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
+ mterms[t] = term.text();
+
+ // store which documents use this term
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
+
+ t++;
} while (enumerator.next());
} finally {
termDocs.close();
}
+ // if there are less terms than documents,
+ // trim off the dead array space
+ if (t < mterms.length) {
+ terms = new String[t];
+ System.arraycopy (mterms, 0, terms, 0, t);
+ } else {
+ terms = mterms;
+ }
+
return retArray;
}
@@ -215,11 +263,11 @@
}
public Object sortValue (final ScoreDoc i) {
- return new Integer(fieldOrder[i.doc]);
+ return terms[fieldOrder[i.doc]];
}
public int sortType() {
- return SortField.INT;
+ return SortField.STRING;
}
};
}
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org