You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2010/06/22 23:25:40 UTC

svn commit: r957036 - in /lucene/dev/branches/branch_3x/solr: CHANGES.txt src/java/org/apache/solr/request/SimpleFacets.java src/java/org/apache/solr/request/UnInvertedField.java src/java/org/apache/solr/search/SolrIndexSearcher.java

Author: yonik
Date: Tue Jun 22 21:25:40 2010
New Revision: 957036

URL: http://svn.apache.org/viewvc?rev=957036&view=rev
Log:
SOLR-1968: reuse termsenum,docsenum when generating cached sets (3x backport)

Modified:
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Tue Jun 22 21:25:40 2010
@@ -153,6 +153,11 @@ Optimizations
 
 * SOLR-1874: Optimize PatternReplaceFilter for better performance. (rmuir, uschindler)
 
+* SOLR-1968: speed up initial filter cache population for facet.method=enum and
+  also big terms for multi-valued facet.method=fc.  The resulting speedup
+  for the first facet request is anywhere from 30% to 32x, depending on how many
+  terms are in the field and how many documents match per term.  (yonik)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/SimpleFacets.java Tue Jun 22 21:25:40 2010
@@ -471,6 +471,9 @@ public class SimpleFacets {
     String startTerm = prefix==null ? "" : ft.toInternal(prefix);
     TermEnum te = r.terms(new Term(field,startTerm));
     TermDocs td = r.termDocs();
+    SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
+    tdState.tenum = te;
+    tdState.tdocs = td;
 
     if (docs.size() >= mincount) { 
     do {
@@ -491,7 +494,7 @@ public class SimpleFacets {
 
         if (df >= minDfFilterCache) {
           // use the filter cache
-          c = searcher.numDocs(new TermQuery(t), docs);
+          c = docs.intersectionSize( searcher.getPositiveDocSet(new TermQuery(t), tdState) );
         } else {
           // iterate over TermDocs to calculate the intersection
           td.seek(te);

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/request/UnInvertedField.java Tue Jun 22 21:25:40 2010
@@ -193,6 +193,10 @@ public class UnInvertedField {
 
     NumberedTermEnum te = ti.getEnumerator(reader);
 
+    SolrIndexSearcher.TermDocsState tdState = new SolrIndexSearcher.TermDocsState();
+    tdState.tenum = te.tenum;
+    tdState.tdocs = te.termDocs;
+
     // threshold, over which we use set intersections instead of counting
     // to (1) save memory, and (2) speed up faceting.
     // Add 2 for testing purposes so that there will always be some terms under
@@ -243,7 +247,7 @@ public class UnInvertedField {
         topTerm.termNum = termNum;
         bigTerms.put(topTerm.termNum, topTerm);
 
-        DocSet set = searcher.getDocSet(new TermQuery(topTerm.term));
+        DocSet set = searcher.getPositiveDocSet(new TermQuery(topTerm.term), tdState);
         maxTermCounts[termNum] = set.size();
 
         te.next();

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=957036&r1=957035&r2=957036&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Tue Jun 22 21:25:40 2010
@@ -21,6 +21,7 @@ import org.apache.lucene.document.*;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.search.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -565,6 +566,18 @@ public class SolrIndexSearcher extends I
     return answer;
   }
 
+    // only handle positive (non negative) queries
+  /** @lucene.internal */
+  public DocSet getPositiveDocSet(Query q, TermDocsState tdState) throws IOException {
+    DocSet answer;
+    if (filterCache != null) {
+      answer = (DocSet)filterCache.get(q);
+      if (answer!=null) return answer;
+    }
+    answer = getDocSetNC(q, null, tdState);
+    if (filterCache != null) filterCache.put(q,answer);
+    return answer;
+  }
 
   private static Query matchAllDocsQuery = new MatchAllDocsQuery();
 
@@ -659,6 +672,56 @@ public class SolrIndexSearcher extends I
     }
   }
 
+  /** @lucene.internal */
+  public static class TermDocsState {
+    public TermEnum tenum;
+    public TermDocs tdocs;
+  }
+
+  // query must be positive
+  protected DocSet getDocSetNC(Query query, DocSet filter, TermDocsState tdState) throws IOException {
+    int smallSetSize = maxDoc()>>6;
+    int largestPossible = tdState.tenum.docFreq();
+    int[] docs = new int[Math.min(smallSetSize, largestPossible)];
+    OpenBitSet obs = null;
+    int upto=0;
+    int numBits = 0;
+
+    if (tdState.tdocs == null) {
+      tdState.tdocs = reader.termDocs();
+    }
+
+    tdState.tdocs.seek(tdState.tenum);
+
+    int[] arr = new int[Math.min(largestPossible, 256)];
+    int[] freq = new int[arr.length];
+
+    for(;;) {
+      int num = tdState.tdocs.read(arr, freq);
+      if (num==0) break;
+      if (upto + num > docs.length) {
+        if (obs == null) obs = new OpenBitSet(maxDoc());
+        for (int i = 0; i<num; i++) {
+          obs.fastSet(arr[i]);
+        }
+        numBits += num;
+      } else {
+        System.arraycopy(arr, 0, docs, upto, num);
+        upto += num;
+      }
+    }
+
+    if (obs != null) {
+      for (int i=0; i<upto; i++) {
+        obs.fastSet(docs[i]);
+      }
+      numBits += upto;
+      return new BitDocSet(obs, numBits);
+    }
+
+    return new SortedIntDocSet(docs, upto);
+  }
+
 
   /**
    * Returns the set of document ids matching both the query and the filter.