You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2008/06/02 14:29:02 UTC
svn commit: r662413 - in /lucene/java/trunk: CHANGES.txt
contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
Author: gsingers
Date: Mon Jun 2 05:29:02 2008
New Revision: 662413
URL: http://svn.apache.org/viewvc?rev=662413&view=rev
Log:
LUCENE-1295: Made method public and added retrieveInterestingTerms variation
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=662413&r1=662412&r2=662413&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Jun 2 05:29:02 2008
@@ -175,6 +175,8 @@
and DocIdSetIterator-based filters. Backwards-compatibility with old
BitSet-based filters is ensured. (Paul Elschot via Michael Busch)
+15. LUCENE-1295: Added new method to MoreLikeThis for retrieving interesting terms and made retrieveTerms(int) public. (Grant Ingersoll)
+
Optimizations
1. LUCENE-705: When building a compound file, use
Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java?rev=662413&r1=662412&r2=662413&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (original)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java Mon Jun 2 05:29:02 2008
@@ -731,7 +731,7 @@
*
* @param docNum the id of the lucene document from which to find terms
*/
- private PriorityQueue retrieveTerms(int docNum) throws IOException {
+ public PriorityQueue retrieveTerms(int docNum) throws IOException {
Map termFreqMap = new HashMap();
for (int i = 0; i < fieldNames.length; i++) {
String fieldName = fieldNames[i];
@@ -871,7 +871,24 @@
return createQueue(words);
}
- /**
+ /**
+ * @see #retrieveInterestingTerms(java.io.Reader)
+ */
+ public String [] retrieveInterestingTerms(int docNum) throws IOException{
+ ArrayList al = new ArrayList( maxQueryTerms);
+ PriorityQueue pq = retrieveTerms(docNum);
+ Object cur;
+ int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
+ // we just want to return the top words
+ while (((cur = pq.pop()) != null) && lim-- > 0) {
+ Object[] ar = (Object[]) cur;
+ al.add( ar[ 0]); // the 1st entry is the interesting word
+ }
+ String[] res = new String[ al.size()];
+ return (String[]) al.toArray( res);
+ }
+
+ /**
* Convenience routine to make it easy to return the most interesting words in a document.
* More advanced users will call {@link #retrieveTerms(java.io.Reader) retrieveTerms()} directly.
* @param r the source document