You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2009/08/24 17:56:32 UTC
svn commit: r807289 - in /lucene/solr/trunk: CHANGES.txt
src/common/org/apache/solr/common/params/TermsParams.java
src/java/org/apache/solr/handler/component/TermsComponent.java
src/test/org/apache/solr/handler/component/TermsComponentTest.java
Author: yonik
Date: Mon Aug 24 15:56:32 2009
New Revision: 807289
URL: http://svn.apache.org/viewvc?rev=807289&view=rev
Log:
SOLR-1156: Sort TermsComponent results by frequency
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java
lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java
lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon Aug 24 15:56:32 2009
@@ -280,6 +280,8 @@
high precision date subtraction, add sub() for subtracting other arguments.
(yonik)
+73. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik)
+
Optimizations
----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
Modified: lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java (original)
+++ lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java Mon Aug 24 15:56:32 2009
@@ -82,5 +82,13 @@
* For instance, the index form of numeric numbers is not human readable. The default is false.
*/
public static final String TERMS_RAW = TERMS_PREFIX + "raw";
+
+ /**
+ * Optional. If sorting by frequency is enabled. Defaults to sorting by count.
+ */
+ public static final String TERMS_SORT = TERMS_PREFIX + "sort";
+
+ public static final String TERMS_SORT_COUNT = "count";
+ public static final String TERMS_SORT_INDEX = "index";
}
Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java Mon Aug 24 15:56:32 2009
@@ -26,6 +26,8 @@
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField;
+import org.apache.solr.request.SimpleFacets.CountPair;
+import org.apache.solr.util.BoundedTreeSet;
import java.io.IOException;
@@ -55,6 +57,8 @@
String upperStr = params.get(TermsParams.TERMS_UPPER);
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
+ boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
+ params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
if (freqmax<0) {
@@ -77,6 +81,7 @@
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
int i = 0;
+ BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null);
NamedList fieldTerms = new NamedList();
terms.add(field, fieldTerms);
Term lowerTestTerm = termEnum.term();
@@ -87,7 +92,7 @@
termEnum.next();
}
- while (i<limit) {
+ while (i<limit || sort) {
Term theTerm = termEnum.term();
@@ -111,14 +116,29 @@
if (docFreq >= freqmin && docFreq <= freqmax) {
// add the term to the list
String label = raw ? indexedText : ft.indexedToReadable(indexedText);
- fieldTerms.add(label, docFreq);
- i++;
+ if (sort) {
+ queue.add(new CountPair<String, Integer>(label, docFreq));
+ } else {
+ fieldTerms.add(label, docFreq);
+ i++;
+ }
}
termEnum.next();
}
termEnum.close();
+
+ if (sort) {
+ for (CountPair<String, Integer> item : queue) {
+ if (i < limit) {
+ fieldTerms.add(item.key, item.val);
+ i++;
+ } else {
+ break;
+ }
+ }
+ }
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
Modified: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java?rev=807289&r1=807288&r2=807289&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java Mon Aug 24 15:56:32 2009
@@ -62,6 +62,13 @@
assertU(adoc("id", "15", "standardfilt", "d"));
assertU(adoc("id", "16", "standardfilt", "d"));
+ assertU(adoc("id", "17", "standardfilt", "snake"));
+ assertU(adoc("id", "18", "standardfilt", "spider"));
+ assertU(adoc("id", "19", "standardfilt", "shark"));
+ assertU(adoc("id", "20", "standardfilt", "snake"));
+ assertU(adoc("id", "21", "standardfilt", "snake"));
+ assertU(adoc("id", "22", "standardfilt", "shark"));
+
assertU("commit", commit());
}
@@ -203,6 +210,72 @@
assertTrue("value is null and it shouldn't be", value != null);
}
+ public void testSortCount() throws Exception {
+ SolrCore core = h.getCore();
+ TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
+ assertTrue("tc is null and it shouldn't be", tc != null);
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(TermsParams.TERMS, "true");
+ params.add(TermsParams.TERMS_FIELD, "standardfilt");
+ params.add(TermsParams.TERMS_LOWER, "s");
+ params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
+ params.add(TermsParams.TERMS_PREFIX_STR, "s");
+ params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT);
+
+ SolrRequestHandler handler;
+ SolrQueryResponse rsp;
+ NamedList values;
+ NamedList terms;
+ handler = core.getRequestHandler("/terms");
+ assertTrue("handler is null and it shouldn't be", handler != null);
+ rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+ values = rsp.getValues();
+ terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
+ assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
+ assertTrue("Item 0 name is not 'snake'", terms.getName(0).equals("snake"));
+ assertTrue("Item 0 frequency is not '3'", (Integer) terms.getVal(0) == 3);
+ assertTrue("Item 1 name is not 'shark'", terms.getName(1).equals("shark"));
+ assertTrue("Item 1 frequency is not '2'", (Integer) terms.getVal(1) == 2);
+ assertTrue("Item 2 name is not 'spider'", terms.getName(2).equals("spider"));
+ assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
+ }
+
+ public void testSortIndex() throws Exception {
+ SolrCore core = h.getCore();
+ TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
+ assertTrue("tc is null and it shouldn't be", tc != null);
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(TermsParams.TERMS, "true");
+ params.add(TermsParams.TERMS_FIELD, "standardfilt");
+ params.add(TermsParams.TERMS_LOWER, "s");
+ params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
+ params.add(TermsParams.TERMS_PREFIX_STR, "s");
+ params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
+
+ SolrRequestHandler handler;
+ SolrQueryResponse rsp;
+ NamedList values;
+ NamedList terms;
+ handler = core.getRequestHandler("/terms");
+ assertTrue("handler is null and it shouldn't be", handler != null);
+ rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+ values = rsp.getValues();
+ terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
+ assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
+ assertTrue("Item 0 name is not 'shark' it is " + terms.getName(0), terms.getName(0).equals("shark"));
+ assertTrue("Item 0 frequency is not '2'", (Integer) terms.getVal(0) == 2);
+ assertTrue("Item 1 name is not 'snake', it is " + terms.getName(1), terms.getName(1).equals("snake"));
+ assertTrue("Item 1 frequency is not '3'", (Integer) terms.getVal(1) == 3);
+ assertTrue("Item 2 name is not 'spider', it is " + terms.getName(2), terms.getName(2).equals("spider"));
+ assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
+ }
+
public void testPastUpper() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
@@ -412,7 +485,7 @@
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
- assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1);
+ assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
Integer d = (Integer) terms.get("d");
assertTrue(d + " does not equal: " + 3, d == 3);