You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/07/22 19:54:37 UTC

svn commit: r1692304 - in /lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet: FacetField.java FacetFieldProcessorNumeric.java FacetProcessor.java FacetQuery.java FacetRange.java UnInvertedField.java

Author: yonik
Date: Wed Jul 22 17:54:36 2015
New Revision: 1692304

URL: http://svn.apache.org/r1692304
Log:
SOLR-7446: simplify missing bucket handling

Modified:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1692304&r1=1692303&r2=1692304&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Wed Jul 22 17:54:36 2015
@@ -53,7 +53,6 @@ import org.apache.solr.common.util.Simpl
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.schema.TrieField;
-import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.HashDocSet;
 import org.apache.solr.search.SolrIndexSearcher;
@@ -174,7 +173,6 @@ abstract class FacetFieldProcessor exten
   SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
 
   SpecialSlotAcc allBucketsAcc;  // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
-  SpecialSlotAcc missingAcc;     // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
 
 
   FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
@@ -502,7 +500,6 @@ abstract class FacetFieldProcessorFCBase
   int maxSlots;
 
   int allBucketsSlot = -1;  // slot for the primary Accs (countAcc, collectAcc)
-  int missingSlot = -1;
 
   public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
     super(fcontext, freq, sf);
@@ -538,9 +535,6 @@ abstract class FacetFieldProcessorFCBase
     if (freq.allBuckets) {
       allBucketsSlot = maxSlots++;
     }
-    if (freq.missing) {
-      missingSlot = maxSlots++;
-    }
 
     createCollectAcc(nDocs, maxSlots);
 
@@ -548,11 +542,6 @@ abstract class FacetFieldProcessorFCBase
       allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
     }
 
-    if (freq.missing) {
-      // TODO: optimize case when missingSlot can be contiguous with other slots
-      missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
-    }
-
     collectDocs();
 
     return findTopSlots();
@@ -587,7 +576,7 @@ abstract class FacetFieldProcessorFCBase
     };
 
     Slot bottom = null;
-    for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
+    for (int i = 0; i < nTerms; i++) {
       // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
       if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
         continue;
@@ -672,29 +661,8 @@ abstract class FacetFieldProcessorFCBase
 
     if (freq.missing) {
       SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
-      fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
+      fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
       res.add("missing", missingBucket);
-
-      /*** TODO - OPTIMIZE
-      DocSet missingDocSet = null;
-      if (startTermIndex == -1) {
-        fillBucket(missingBucket, countAcc.getCount(0), null);
-      } else {
-        missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
-        // an extra slot was added to the end for this missing bucket
-        countAcc.incrementCount(nTerms, missingDocSet.size());
-        collect(missingDocSet, nTerms);
-        addStats(missingBucket, nTerms);
-      }
-
-      if (freq.getSubFacets().size() > 0) {
-        // TODO: we can do better than this!
-        if (missingDocSet == null) {
-          missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
-        }
-        processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
-      }
-       ***/
     }
 
     return res;
@@ -751,9 +719,6 @@ class FacetFieldProcessorDV extends Face
       endTermIndex = (int)si.getValueCount();
     }
 
-    // optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
-    startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
-
     nTerms = endTermIndex - startTermIndex;
   }
 
@@ -809,6 +774,7 @@ class FacetFieldProcessorDV extends Face
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       int segOrd = singleDv.getOrd(doc);
+      if (segOrd < 0) continue;
       collect(doc, segOrd, toGlobal);
     }
   }
@@ -817,11 +783,8 @@ class FacetFieldProcessorDV extends Face
     int doc;
     while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
       multiDv.setDocument(doc);
-      int segOrd = (int)multiDv.nextOrd();
-      collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
-      if (segOrd < 0) continue;
       for(;;) {
-        segOrd = (int)multiDv.nextOrd();
+        int segOrd = (int)multiDv.nextOrd();
         if (segOrd < 0) break;
         collect(doc, segOrd, toGlobal);
       }
@@ -837,8 +800,7 @@ class FacetFieldProcessorDV extends Face
       if (collectAcc != null) {
         collectAcc.collect(doc, arrIdx);
       }
-      // since this can be called for missing, we need to ensure it's currently not.
-      if (allBucketsAcc != null && ord >= 0) {
+      if (allBucketsAcc != null) {
         allBucketsAcc.collect(doc, arrIdx);
       }
     }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java?rev=1692304&r1=1692303&r2=1692304&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java Wed Jul 22 17:54:36 2015
@@ -32,6 +32,7 @@ import org.apache.lucene.util.PriorityQu
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocSetCollector;
 
 class FacetFieldProcessorNumeric extends FacetFieldProcessor {
   static int MAXIMUM_STARTING_TABLE_SIZE=1024;  // must be a power of two, non-final to support setting by tests
@@ -137,10 +138,8 @@ class FacetFieldProcessorNumeric extends
     super(fcontext, freq, sf);
   }
 
-  int missingSlot = -1;
   int allBucketsSlot = -1;
 
-
   @Override
   public void process() throws IOException {
     super.process();
@@ -148,18 +147,14 @@ class FacetFieldProcessorNumeric extends
   }
 
   private void doRehash(LongCounts table) {
-    if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return;
+    if (collectAcc == null && allBucketsAcc == null) return;
 
     // Our "count" acc is backed by the hash table and will already be rehashed
     // otherAccs don't need to be rehashed
 
     int newTableSize = table.numSlots();
     int numSlots = newTableSize;
-    final int oldMissingSlot = missingSlot;
     final int oldAllBucketsSlot = allBucketsSlot;
-    if (oldMissingSlot >= 0) {
-      missingSlot = numSlots++;
-    }
     if (oldAllBucketsSlot >= 0) {
       allBucketsSlot = numSlots++;
     }
@@ -178,9 +173,6 @@ class FacetFieldProcessorNumeric extends
         if (oldSlot < mapping.length) {
           return mapping[oldSlot];
         }
-        if (oldSlot == oldMissingSlot) {
-          return missingSlot;
-        }
         if (oldSlot == oldAllBucketsSlot) {
           return allBucketsSlot;
         }
@@ -192,9 +184,6 @@ class FacetFieldProcessorNumeric extends
     if (collectAcc != null) {
       collectAcc.resize(resizer);
     }
-    if (missingAcc != null) {
-      missingAcc.resize(resizer);
-    }
     if (allBucketsAcc != null) {
       allBucketsAcc.resize(resizer);
     }
@@ -225,9 +214,7 @@ class FacetFieldProcessorNumeric extends
 
     int numMissing = 0;
 
-    if (freq.missing) {
-      missingSlot = numSlots++;
-    }
+
     if (freq.allBuckets) {
       allBucketsSlot = numSlots++;
     }
@@ -302,11 +289,6 @@ class FacetFieldProcessorNumeric extends
       allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
     }
 
-    if (freq.missing) {
-      // TODO: optimize case when missingSlot can be contiguous with other slots
-      missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
-    }
-
     NumericDocValues values = null;
     Bits docsWithField = null;
 
@@ -335,11 +317,7 @@ class FacetFieldProcessorNumeric extends
 
       int segDoc = doc - segBase;
       long val = values.get(segDoc);
-      if (val == 0 && !docsWithField.get(segDoc)) {
-        if (missingAcc != null) {
-          missingAcc.collect(segDoc, -1);
-        }
-      } else {
+      if (val != 0 && docsWithField.get(segDoc)) {
         int slot = table.add(val);  // this can trigger a rehash rehash
 
         // countAcc.incrementCount(slot, 1);
@@ -428,7 +406,7 @@ class FacetFieldProcessorNumeric extends
       // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
 
       SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
-      fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
+      fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
       res.add("missing", missingBucket);
     }
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java?rev=1692304&r1=1692303&r2=1692304&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java Wed Jul 22 17:54:36 2015
@@ -283,15 +283,16 @@ public class FacetProcessor<FacetRequest
   }
 
 
-  public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
+  public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
     boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
 
     // TODO: always collect counts or not???
 
-    DocSet result = null;
     int count;
 
-    if (needDocSet) {
+    if (result != null) {
+      count = result.size();
+    } else if (needDocSet) {
       if (q == null) {
         result = fcontext.base;
         // result.incref(); // OFF-HEAP

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java?rev=1692304&r1=1692303&r2=1692304&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java Wed Jul 22 17:54:36 2015
@@ -54,7 +54,7 @@ class FacetQueryProcessor extends FacetP
   public void process() throws IOException {
     super.process();
     response = new SimpleOrderedMap<>();
-    fillBucket(response, freq.q);
+    fillBucket(response, freq.q, null);
   }
 
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java?rev=1692304&r1=1692303&r2=1692304&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java Wed Jul 22 17:54:36 2015
@@ -336,7 +336,7 @@ class FacetRangeProcessor extends FacetP
     }
 
     Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
-    fillBucket(bucket, rangeQ);
+    fillBucket(bucket, rangeQ, null);
 
     return bucket;
   }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java?rev=1692304&r1=1692303&r2=1692304&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java Wed Jul 22 17:54:36 2015
@@ -394,7 +394,7 @@ public class UnInvertedField extends Doc
 
 
   public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
-    if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
+    if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
       getCounts(processor, processor.countAcc);
       return;
     }