You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cu...@apache.org on 2004/02/24 21:41:16 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/search StringSortedHitQueue.java

cutting     2004/02/24 12:41:16

  Modified:    src/java/org/apache/lucene/search StringSortedHitQueue.java
  Log:
  Fixed problem with sorting.
  
  Revision  Changes    Path
  1.3       +62 -14    jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java
  
  Index: StringSortedHitQueue.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/StringSortedHitQueue.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- StringSortedHitQueue.java	24 Feb 2004 19:34:58 -0000	1.2
  +++ StringSortedHitQueue.java	24 Feb 2004 20:41:16 -0000	1.3
  @@ -26,8 +26,9 @@
   /**
    * Expert: A sorted hit queue for fields that contain string values.
    * Hits are sorted into the queue by the values in the field and then by document number.
  - * The internal cache contains integers - the strings are sorted and
  - * then only their sequence number cached.
  + * Warning: The internal cache could be quite large, depending on the number of terms
  + * in the field!  All the terms are kept in memory, as well as a sorted array of
  + * integers representing their relative position.
    *
    * <p>Created: Feb 2, 2004 9:26:33 AM
    *
  @@ -68,21 +69,24 @@
   	/**
   	 * Returns a comparator for sorting hits according to a field containing strings.
   	 * @param reader  Index to use.
  -	 * @param field  Field containg string values.
  +	 * @param fieldname  Field containg string values.
   	 * @return  Comparator for sorting hits.
   	 * @throws IOException If an error occurs reading the index.
   	 */
  -	static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
  +	static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname)
   	throws IOException {
  +		final String field = fieldname.intern();
   		return new ScoreDocLookupComparator() {
   
   			/** The sort information being used by this instance */
   			protected final int[] fieldOrder = generateSortIndex();
  +			protected String[] terms;
   
   			private final int[] generateSortIndex()
   			throws IOException {
   
   				final int[] retArray = new int[reader.maxDoc()];
  +				final String[] mterms = new String[reader.maxDoc()];   // guess length
   
   				TermEnum enumerator = reader.terms (new Term (field, ""));
   				TermDocs termDocs = reader.termDocs();
  @@ -98,22 +102,43 @@
   				// following loop will automatically sort the
   				// terms in the correct order.
   
  +				// if a given document has more than one term
  +				// in the field, only the last one will be used.
  +
  +				int t = 0;  // current term number
   				try {
  -					int t = 0;  // current term number
   					do {
   						Term term = enumerator.term();
   						if (term.field() != field) break;
  -						t++;
  +
  +						// store term text
  +						// we expect that there is at most one term per document
  +						if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
  +						mterms[t] = term.text();
  +
  +						// store which documents use this term
   						termDocs.seek (enumerator);
   						while (termDocs.next()) {
   							retArray[termDocs.doc()] = t;
   						}
  +
  +						t++;
   					} while (enumerator.next());
  +
   				} finally {
   					enumerator.close();
   					termDocs.close();
   				}
   
  +				// if there are less terms than documents,
  +				// trim off the dead array space
  +				if (t < mterms.length) {
  +					terms = new String[t];
  +					System.arraycopy (mterms, 0, terms, 0, t);
  +				} else {
  +					terms = mterms;
  +				}
  +
   				return retArray;
   			}
   
  @@ -138,11 +163,11 @@
   			}
   
   			public Object sortValue (final ScoreDoc i) {
  -				return new Integer(fieldOrder[i.doc]);
  +				return terms[fieldOrder[i.doc]];
   			}
   
   			public int sortType() {
  -				return SortField.INT;
  +				return SortField.STRING;
   			}
   		};
   	}
  @@ -152,20 +177,23 @@
   	 * Returns a comparator for sorting hits according to a field containing strings using the given enumerator
   	 * to collect term values.
   	 * @param reader  Index to use.
  -	 * @param field  Field containg string values.
  +	 * @param fieldname  Field containg string values.
   	 * @return  Comparator for sorting hits.
   	 * @throws IOException If an error occurs reading the index.
   	 */
  -	static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
  +	static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname)
   	throws IOException {
  +		final String field = fieldname.intern();
   		return new ScoreDocLookupComparator() {
   
   			protected final int[] fieldOrder = generateSortIndex();
  +			protected String[] terms;
   
   			private final int[] generateSortIndex()
   			throws IOException {
   
   				final int[] retArray = new int[reader.maxDoc()];
  +				final String[] mterms = new String[reader.maxDoc()];  // guess length
   
   				// NOTE: the contract for TermEnum says the
   				// terms will be in natural order (which is
  @@ -175,22 +203,42 @@
   				// following loop will automatically sort the
   				// terms in the correct order.
   
  +				// if a given document has more than one term
  +				// in the field, only the last one will be used.
  +
   				TermDocs termDocs = reader.termDocs();
  +				int t = 0;  // current term number
   				try {
  -					int t = 0;  // current term number
   					do {
   						Term term = enumerator.term();
   						if (term.field() != field) break;
  -						t++;
  +
  +						// store term text
  +						// we expect that there is at most one term per document
  +						if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
  +						mterms[t] = term.text();
  +
  +						// store which documents use this term
   						termDocs.seek (enumerator);
   						while (termDocs.next()) {
   							retArray[termDocs.doc()] = t;
   						}
  +
  +						t++;
   					} while (enumerator.next());
   				} finally {
   					termDocs.close();
   				}
   
  +				// if there are less terms than documents,
  +				// trim off the dead array space
  +				if (t < mterms.length) {
  +					terms = new String[t];
  +					System.arraycopy (mterms, 0, terms, 0, t);
  +				} else {
  +					terms = mterms;
  +				}
  +
   				return retArray;
   			}
   
  @@ -215,11 +263,11 @@
   			}
   
   			public Object sortValue (final ScoreDoc i) {
  -				return new Integer(fieldOrder[i.doc]);
  +				return terms[fieldOrder[i.doc]];
   			}
   
   			public int sortType() {
  -				return SortField.INT;
  +				return SortField.STRING;
   			}
   		};
   	}
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org