You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2008/06/02 14:29:02 UTC
svn commit: r662413 - in /lucene/java/trunk: CHANGES.txt contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java

Author: gsingers
Date: Mon Jun  2 05:29:02 2008
New Revision: 662413

URL: http://svn.apache.org/viewvc?rev=662413&view=rev
Log:
LUCENE-1295: Made method public and added retrieveInterestingTerms variation

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=662413&r1=662412&r2=662413&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Jun  2 05:29:02 2008
@@ -175,6 +175,8 @@
     and DocIdSetIterator-based filters. Backwards-compatibility with old 
     BitSet-based filters is ensured. (Paul Elschot via Michael Busch)
 
+15. LUCENE-1295: Added new method to MoreLikeThis for retrieving interesting terms and made retrieveTerms(int) public. (Grant Ingersoll)
+
 Optimizations
 
  1. LUCENE-705: When building a compound file, use

Modified: lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java?rev=662413&r1=662412&r2=662413&view=diff
==============================================================================
--- lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (original)
+++ lucene/java/trunk/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java Mon Jun  2 05:29:02 2008
@@ -731,7 +731,7 @@
      *
      * @param docNum the id of the lucene document from which to find terms
      */
-    private PriorityQueue retrieveTerms(int docNum) throws IOException {
+    public PriorityQueue retrieveTerms(int docNum) throws IOException {
         Map termFreqMap = new HashMap();
         for (int i = 0; i < fieldNames.length; i++) {
             String fieldName = fieldNames[i];
@@ -871,7 +871,24 @@
         return createQueue(words);
     }
 
-	/**
+  /**
+   * @see #retrieveInterestingTerms(java.io.Reader) 
+   */
+  public String [] retrieveInterestingTerms(int docNum) throws IOException{
+    ArrayList al = new ArrayList( maxQueryTerms);
+		PriorityQueue pq = retrieveTerms(docNum);
+		Object cur;
+		int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
+		// we just want to return the top words
+		while (((cur = pq.pop()) != null) && lim-- > 0) {
+            Object[] ar = (Object[]) cur;
+			al.add( ar[ 0]); // the 1st entry is the interesting word
+		}
+		String[] res = new String[ al.size()];
+		return (String[]) al.toArray( res);
+  }
+
+  /**
 	 * Convenience routine to make it easy to return the most interesting words in a document.
 	 * More advanced users will call {@link #retrieveTerms(java.io.Reader) retrieveTerms()} directly.
 	 * @param r the source document