You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2009/08/24 17:56:32 UTC

svn commit: r807289 - in /lucene/solr/trunk: CHANGES.txt src/common/org/apache/solr/common/params/TermsParams.java src/java/org/apache/solr/handler/component/TermsComponent.java src/test/org/apache/solr/handler/component/TermsComponentTest.java

Author: yonik
Date: Mon Aug 24 15:56:32 2009
New Revision: 807289

URL: http://svn.apache.org/viewvc?rev=807289&view=rev
Log:
SOLR-1156: Sort TermsComponent results by frequency

Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java
    lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java
    lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon Aug 24 15:56:32 2009
@@ -280,6 +280,8 @@
     high precision date subtraction, add sub() for subtracting other arguments.
     (yonik)
 
+73. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik)
+
 Optimizations
 ----------------------
  1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

Modified: lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java (original)
+++ lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java Mon Aug 24 15:56:32 2009
@@ -82,5 +82,13 @@
    * For instance, the index form of numeric numbers is not human readable.  The default is false.
    */
   public static final String TERMS_RAW = TERMS_PREFIX + "raw";
+
+  /**
+   * Optional.  If sorting by frequency is enabled.  Defaults to sorting by count.
+   */
+  public static final String TERMS_SORT = TERMS_PREFIX + "sort";
+  
+  public static final String TERMS_SORT_COUNT = "count";
+  public static final String TERMS_SORT_INDEX = "index";
 }
 

Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java Mon Aug 24 15:56:32 2009
@@ -26,6 +26,8 @@
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.StrField;
+import org.apache.solr.request.SimpleFacets.CountPair;
+import org.apache.solr.util.BoundedTreeSet;
 
 import java.io.IOException;
 
@@ -55,6 +57,8 @@
         String upperStr = params.get(TermsParams.TERMS_UPPER);
         boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
         boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
+        boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
+                          params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
         int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
         int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
         if (freqmax<0) {
@@ -77,6 +81,7 @@
           
           TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
           int i = 0;
+          BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null); 
           NamedList fieldTerms = new NamedList();
           terms.add(field, fieldTerms);
           Term lowerTestTerm = termEnum.term();
@@ -87,7 +92,7 @@
             termEnum.next();
           }
 
-          while (i<limit) {
+          while (i<limit || sort) {
 
             Term theTerm = termEnum.term();
 
@@ -111,14 +116,29 @@
             if (docFreq >= freqmin && docFreq <= freqmax) {
               // add the term to the list
               String label = raw ? indexedText : ft.indexedToReadable(indexedText);
-              fieldTerms.add(label, docFreq);
-              i++;
+              if (sort) {
+                queue.add(new CountPair<String, Integer>(label, docFreq));
+              } else {
+                fieldTerms.add(label, docFreq);
+                i++;
+              }
             }
 
             termEnum.next();
           }
 
           termEnum.close();
+          
+          if (sort) {
+            for (CountPair<String, Integer> item : queue) {
+              if (i < limit) {
+                fieldTerms.add(item.key, item.val);
+                i++;
+              } else {
+                break;
+              }
+            }
+          }
         }
       } else {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");

Modified: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java Mon Aug 24 15:56:32 2009
@@ -62,6 +62,13 @@
     assertU(adoc("id", "15", "standardfilt", "d"));
     assertU(adoc("id", "16", "standardfilt", "d"));
 
+    assertU(adoc("id", "17", "standardfilt", "snake"));
+    assertU(adoc("id", "18", "standardfilt", "spider"));
+    assertU(adoc("id", "19", "standardfilt", "shark"));
+    assertU(adoc("id", "20", "standardfilt", "snake"));
+    assertU(adoc("id", "21", "standardfilt", "snake"));
+    assertU(adoc("id", "22", "standardfilt", "shark"));
+    
     assertU("commit", commit());
   }
 
@@ -203,6 +210,72 @@
     assertTrue("value is null and it shouldn't be", value != null);
   }
 
+  public void testSortCount() throws Exception {
+    SolrCore core = h.getCore();
+    TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
+    assertTrue("tc is null and it shouldn't be", tc != null);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(TermsParams.TERMS, "true");
+    params.add(TermsParams.TERMS_FIELD, "standardfilt");
+    params.add(TermsParams.TERMS_LOWER,  "s");
+    params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
+    params.add(TermsParams.TERMS_PREFIX_STR, "s");
+    params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT);
+    
+    SolrRequestHandler handler;
+    SolrQueryResponse rsp;
+    NamedList values;
+    NamedList terms;
+    handler = core.getRequestHandler("/terms");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    values = rsp.getValues();
+    terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
+    assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
+    assertTrue("Item 0 name is not 'snake'", terms.getName(0).equals("snake"));
+    assertTrue("Item 0 frequency is not '3'", (Integer) terms.getVal(0) == 3);
+    assertTrue("Item 1 name is not 'shark'", terms.getName(1).equals("shark"));
+    assertTrue("Item 1 frequency is not '2'", (Integer) terms.getVal(1) == 2);
+    assertTrue("Item 2 name is not 'spider'", terms.getName(2).equals("spider"));
+    assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);    
+  }
+
+  public void testSortIndex() throws Exception {
+    SolrCore core = h.getCore();
+    TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
+    assertTrue("tc is null and it shouldn't be", tc != null);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(TermsParams.TERMS, "true");
+    params.add(TermsParams.TERMS_FIELD, "standardfilt");
+    params.add(TermsParams.TERMS_LOWER,  "s");
+    params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
+    params.add(TermsParams.TERMS_PREFIX_STR, "s");
+    params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
+    
+    SolrRequestHandler handler;
+    SolrQueryResponse rsp;
+    NamedList values;
+    NamedList terms;
+    handler = core.getRequestHandler("/terms");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    values = rsp.getValues();
+    terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
+    assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
+    assertTrue("Item 0 name is not 'shark' it is " + terms.getName(0), terms.getName(0).equals("shark"));
+    assertTrue("Item 0 frequency is not '2'", (Integer) terms.getVal(0) == 2);
+    assertTrue("Item 1 name is not 'snake', it is " + terms.getName(1), terms.getName(1).equals("snake"));
+    assertTrue("Item 1 frequency is not '3'", (Integer) terms.getVal(1) == 3);
+    assertTrue("Item 2 name is not 'spider', it is " + terms.getName(2), terms.getName(2).equals("spider"));
+    assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);    
+  }
+  
   public void testPastUpper() throws Exception {
     SolrCore core = h.getCore();
     TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
@@ -412,7 +485,7 @@
     handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
     values = rsp.getValues();
     terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
-    assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1);
+    assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
     Integer d = (Integer) terms.get("d");
     assertTrue(d + " does not equal: " + 3, d == 3);