You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2010/06/22 23:25:40 UTC
svn commit: r957036 - in /lucene/dev/branches/branch_3x/solr: CHANGES.txt
src/java/org/apache/solr/request/SimpleFacets.java
src/java/org/apache/solr/request/UnInvertedField.java
src/java/org/apache/solr/search/SolrIndexSearcher.java
Author: yonik
Date: Tue Jun 22 21:25:40 2010
New Revision: 957036
URL: http://svn.apache.org/viewvc?rev=957036&view=rev
Log:
SOLR-1968: reuse termsenum,docsenum when generating cached sets (3x backport)
Modified:
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Tue Jun 22 21:25:40 2010
@@ -153,6 +153,11 @@ Optimizations
* SOLR-1874: Optimize PatternReplaceFilter for better performance. (rmuir, uschindler)
+* SOLR-1968: speed up initial filter cache population for facet.method=enum and
+ also big terms for multi-valued facet.method=fc. The resulting speedup
+ for the first facet request is anywhere from 30% to 32x, depending on how many
+ terms are in the field and how many documents match per term. (yonik)
+
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java Tue Jun 22 21:25:40 2010
@@ -471,6 +471,9 @@ public class SimpleFacets {
String startTerm = prefix==null ? "" : ft.toInternal(prefix);
TermEnum te = r.terms(new Term(field,startTerm));
TermDocs td = r.termDocs();
+ SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
+ tdState.tenum = te;
+ tdState.tdocs = td;
if (docs.size() >= mincount) {
do {
@@ -491,7 +494,7 @@ public class SimpleFacets {
if (df >= minDfFilterCache) {
// use the filter cache
- c = searcher.numDocs(new TermQuery(t), docs);
+ c = docs.intersectionSize( searcher.getPositiveDocSet(new TermQuery(t), tdState) );
} else {
// iterate over TermDocs to calculate the intersection
td.seek(te);
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java Tue Jun 22 21:25:40 2010
@@ -193,6 +193,10 @@ public class UnInvertedField {
NumberedTermEnum te = ti.getEnumerator(reader);
+ SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
+ tdState.tenum = te.tenum;
+ tdState.tdocs = te.termDocs;
+
// threshold, over which we use set intersections instead of counting
// to (1) save memory, and (2) speed up faceting.
// Add 2 for testing purposes so that there will always be some terms under
@@ -243,7 +247,7 @@ public class UnInvertedField {
topTerm.termNum = termNum;
bigTerms.put(topTerm.termNum, topTerm);
- DocSet set = searcher.getDocSet(new TermQuery(topTerm.term));
+ DocSet set = searcher.getPositiveDocSet(new TermQuery(topTerm.term), tdState);
maxTermCounts[termNum] = set.size();
te.next();
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Tue Jun 22 21:25:40 2010
@@ -21,6 +21,7 @@ import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -565,6 +566,18 @@ public class SolrIndexSearcher extends I
return answer;
}
+ // only handle positive (non negative) queries
+ /** @lucene.internal */
+ public DocSet getPositiveDocSet(Query q, TermDocsState tdState) throws IOException {
+ DocSet answer;
+ if (filterCache != null) {
+ answer = (DocSet)filterCache.get(q);
+ if (answer!=null) return answer;
+ }
+ answer = getDocSetNC(q, null, tdState);
+ if (filterCache != null) filterCache.put(q,answer);
+ return answer;
+ }
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
@@ -659,6 +672,56 @@ public class SolrIndexSearcher extends I
}
}
+ /** @lucene.internal */
+ public static class TermDocsState {
+ public TermEnum tenum;
+ public TermDocs tdocs;
+ }
+
+ // query must be positive
+ protected DocSet getDocSetNC(Query query, DocSet filter, TermDocsState tdState) throws IOException {
+ int smallSetSize = maxDoc()>>6;
+ int largestPossible = tdState.tenum.docFreq();
+ int[] docs = new int[Math.min(smallSetSize, largestPossible)];
+ OpenBitSet obs = null;
+ int upto=0;
+ int numBits = 0;
+
+ if (tdState.tdocs == null) {
+ tdState.tdocs = reader.termDocs();
+ }
+
+ tdState.tdocs.seek(tdState.tenum);
+
+ int[] arr = new int[Math.min(largestPossible, 256)];
+ int[] freq = new int[arr.length];
+
+ for(;;) {
+ int num = tdState.tdocs.read(arr, freq);
+ if (num==0) break;
+ if (upto + num > docs.length) {
+ if (obs == null) obs = new OpenBitSet(maxDoc());
+ for (int i = 0; i<num; i++) {
+ obs.fastSet(arr[i]);
+ }
+ numBits += num;
+ } else {
+ System.arraycopy(arr, 0, docs, upto, num);
+ upto += num;
+ }
+ }
+
+ if (obs != null) {
+ for (int i=0; i<upto; i++) {
+ obs.fastSet(docs[i]);
+ }
+ numBits += upto;
+ return new BitDocSet(obs, numBits);
+ }
+
+ return new SortedIntDocSet(docs, upto);
+ }
+
/**
* Returns the set of document ids matching both the query and the filter.