You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/02/26 01:02:17 UTC
svn commit: r1449972 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/search/ core/src/java/org/apache/lucene/util/ facet/src/java/org/apache/lucene/facet/search/ facet/src/test/org/apache/lucene/facet/search/ test-framework/src/java/o...

Author: mikemccand
Date: Tue Feb 26 00:02:16 2013
New Revision: 1449972

URL: http://svn.apache.org/r1449972
Log:
LUCENE-4748: add DrillSideways utility class to facets module

Added:
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java   (with props)
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java   (with props)
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java   (with props)
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
    lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java
    lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Feb 26 00:02:16 2013
@@ -76,6 +76,9 @@ Changes in backwards compatibility polic
   should override FacetsAccumualtor and return the relevant aggregator,
   for aggregating the association values. (Shai Erera)
   
+* LUCENE-4748: A FacetRequest on a non-existent field now returns an
+  empty FacetResult instead of skipping it.  (Shai Erera, Mike McCandless)
+
 Optimizations
 
 * LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
@@ -179,6 +182,10 @@ New Features
 
 * LUCENE-4780: Add MonotonicAppendingLongBuffer: an append-only buffer for
   monotonically increasing values.  (Adrien Grand)
+
+* LUCENE-4748: Added DrillSideways utility class for computing both
+  drill-down and drill-sideways counts for a DrillDownQuery.  (Mike
+  McCandless)
  
 API Changes
 

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java Tue Feb 26 00:02:16 2013
@@ -516,7 +516,7 @@ public class FilteredQuery extends Query
       }
       
       final Bits filterAcceptDocs = docIdSet.bits();
-        // force if RA is requested
+      // force if RA is requested
       final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
       if (useRandomAccess) {
         // if we are using random access, we return the inner scorer, just with other acceptDocs

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java Tue Feb 26 00:02:16 2013
@@ -118,7 +118,7 @@ public final class FixedBitSet extends D
   }
 
   public void set(int index) {
-    assert index >= 0 && index < numBits;
+    assert index >= 0 && index < numBits: "index=" + index + " numBits=" + numBits;
     int wordNum = index >> 6;      // div 64
     int bit = index & 0x3f;     // mod 64
     long bitmask = 1L << bit;

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java Tue Feb 26 00:02:16 2013
@@ -18,8 +18,8 @@ package org.apache.lucene.facet.search;
  */
 
 import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
+import java.util.LinkedHashMap;
+import java.util.Map;
 
 import org.apache.lucene.facet.params.CategoryListParams;
 import org.apache.lucene.facet.params.FacetIndexingParams;
@@ -27,8 +27,11 @@ import org.apache.lucene.facet.taxonomy.
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.FilteredQuery;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
@@ -49,7 +52,7 @@ import org.apache.lucene.search.TermQuer
 public final class DrillDownQuery extends Query {
 
   /** Return a drill-down {@link Term} for a category. */
-  public static final Term term(FacetIndexingParams iParams, CategoryPath path) {
+  public static Term term(FacetIndexingParams iParams, CategoryPath path) {
     CategoryListParams clp = iParams.getCategoryListParams(path);
     char[] buffer = new char[path.fullPathLength()];
     iParams.drillDownTermText(path, buffer);
@@ -57,21 +60,37 @@ public final class DrillDownQuery extend
   }
   
   private final BooleanQuery query;
-  private final Set<String> drillDownDims = new HashSet<String>();
-
+  private final Map<String,Integer> drillDownDims = new LinkedHashMap<String,Integer>();
   private final FacetIndexingParams fip;
 
-  /* Used by clone() */
-  private DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Set<String> drillDownDims) {
+  /** Used by clone() */
+  DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Map<String,Integer> drillDownDims) {
     this.fip = fip;
     this.query = query.clone();
-    this.drillDownDims.addAll(drillDownDims);
+    this.drillDownDims.putAll(drillDownDims);
+  }
+
+  /** Used by DrillSideways */
+  DrillDownQuery(Filter filter, DrillDownQuery other) {
+    query = new BooleanQuery(true); // disable coord
+
+    BooleanClause[] clauses = other.query.getClauses();
+    if (clauses.length == other.drillDownDims.size()) {
+      throw new IllegalArgumentException("cannot apply filter unless baseQuery isn't null; pass ConstantScoreQuery instead");
+    }
+    assert clauses.length == 1+other.drillDownDims.size(): clauses.length + " vs " + (1+other.drillDownDims.size());
+    drillDownDims.putAll(other.drillDownDims);
+    query.add(new FilteredQuery(clauses[0].getQuery(), filter), Occur.MUST);
+    for(int i=1;i<clauses.length;i++) {
+      query.add(clauses[i].getQuery(), Occur.MUST);
+    }
+    fip = other.fip;
   }
 
   /**
-   * Creates a new {@link DrillDownQuery} without a base query, which means that
-   * you intend to perfor a pure browsing query (equivalent to using
-   * {@link MatchAllDocsQuery} as base.
+   * Creates a new {@link DrillDownQuery} without a base query, 
+   * to perform a pure browsing query (equivalent to using
+   * {@link MatchAllDocsQuery} as base).
    */
   public DrillDownQuery(FacetIndexingParams fip) {
     this(fip, null);
@@ -97,14 +116,14 @@ public final class DrillDownQuery extend
    */
   public void add(CategoryPath... paths) {
     Query q;
+    if (paths[0].length == 0) {
+      throw new IllegalArgumentException("all CategoryPaths must have length > 0");
+    }
     String dim = paths[0].components[0];
-    if (drillDownDims.contains(dim)) {
+    if (drillDownDims.containsKey(dim)) {
       throw new IllegalArgumentException("dimension '" + dim + "' was already added");
     }
     if (paths.length == 1) {
-      if (paths[0].length == 0) {
-        throw new IllegalArgumentException("all CategoryPaths must have length > 0");
-      }
       q = new TermQuery(term(fip, paths[0]));
     } else {
       BooleanQuery bq = new BooleanQuery(true); // disable coord
@@ -120,7 +139,7 @@ public final class DrillDownQuery extend
       }
       q = bq;
     }
-    drillDownDims.add(dim);
+    drillDownDims.put(dim, drillDownDims.size());
 
     final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(q);
     drillDownQuery.setBoost(0.0f);
@@ -162,5 +181,12 @@ public final class DrillDownQuery extend
   public String toString(String field) {
     return query.toString(field);
   }
-  
+
+  BooleanQuery getBooleanQuery() {
+    return query;
+  }
+
+  Map<String,Integer> getDims() {
+    return drillDownDims;
+  }
 }

Added: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java?rev=1449972&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (added)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java Tue Feb 26 00:02:16 2013
@@ -0,0 +1,242 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.TopScoreDocCollector;
+
+/**     
+ * Computes drill down and sideways counts for the provided
+ * {@link DrillDownQuery}.  Drill sideways counts include
+ * alternative values/aggregates for the drill-down
+ * dimensions so that a dimension does not disappear after
+ * the user drills down into it.
+ *
+ * <p> Use one of the static search
+ * methods to do the search, and then get the hits and facet
+ * results from the returned {@link DrillSidewaysResult}.
+ *
+ * <p><b>NOTE</b>: this allocates one {@link
+ * FacetsCollector} for each drill-down, plus one.  If your
+ * index has high number of facet labels then this will
+ * multiply your memory usage.
+ *
+ * @lucene.experimental
+ */
+
+public class DrillSideways {
+
+  protected final IndexSearcher searcher;
+  protected final TaxonomyReader taxoReader;
+
+  /** Create a new {@code DrillSideways} instance. */
+  public DrillSideways(IndexSearcher searcher, TaxonomyReader taxoReader) {
+    this.searcher = searcher;
+    this.taxoReader = taxoReader;
+  }
+
+  /**
+   * Search, collecting hits with a {@link Collector}, and
+   * computing drill down and sideways counts.
+   */
+  public DrillSidewaysResult search(DrillDownQuery query,
+                                    Collector hitCollector, FacetSearchParams fsp) throws IOException {
+
+    Map<String,Integer> drillDownDims = query.getDims();
+
+    if (drillDownDims.isEmpty()) {
+      throw new IllegalArgumentException("there must be at least one drill-down");
+    }
+
+    BooleanQuery ddq = query.getBooleanQuery();
+    BooleanClause[] clauses = ddq.getClauses();
+
+    for(FacetRequest fr :  fsp.facetRequests) {
+      if (fr.categoryPath.length == 0) {
+        throw new IllegalArgumentException("all FacetRequests must have CategoryPath with length > 0");
+      }
+    }
+
+    Query baseQuery;
+    int startClause;
+    if (clauses.length == drillDownDims.size()) {
+      // TODO: we could optimize this pure-browse case by
+      // making a custom scorer instead:
+      baseQuery = new MatchAllDocsQuery();
+      startClause = 0;
+    } else {
+      assert clauses.length == 1+drillDownDims.size();
+      baseQuery = clauses[0].getQuery();
+      startClause = 1;
+    }
+
+    Term[][] drillDownTerms = new Term[clauses.length-startClause][];
+    for(int i=startClause;i<clauses.length;i++) {
+      Query q = clauses[i].getQuery();
+      assert q instanceof ConstantScoreQuery;
+      q = ((ConstantScoreQuery) q).getQuery();
+      assert q instanceof TermQuery || q instanceof BooleanQuery;
+      if (q instanceof TermQuery) {
+        drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
+      } else {
+        BooleanQuery q2 = (BooleanQuery) q;
+        BooleanClause[] clauses2 = q2.getClauses();
+        drillDownTerms[i-startClause] = new Term[clauses2.length];
+        for(int j=0;j<clauses2.length;j++) {
+          assert clauses2[j].getQuery() instanceof TermQuery;
+          drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
+        }
+      }
+    }
+
+    FacetsCollector drillDownCollector = FacetsCollector.create(getDrillDownAccumulator(fsp));
+
+    FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
+
+    int idx = 0;
+    for(String dim : drillDownDims.keySet()) {
+      FacetRequest drillSidewaysRequest = null;
+      for(FacetRequest fr : fsp.facetRequests) {
+        assert fr.categoryPath.length > 0;
+        if (fr.categoryPath.components[0].equals(dim)) {
+          if (drillSidewaysRequest != null) {
+            throw new IllegalArgumentException("multiple FacetRequests for drill-sideways dimension \"" + dim + "\"");
+          }
+          drillSidewaysRequest = fr;
+        }
+      }
+      if (drillSidewaysRequest == null) {
+        throw new IllegalArgumentException("could not find FacetRequest for drill-sideways dimension \"" + dim + "\"");
+      }
+      drillSidewaysCollectors[idx++] = FacetsCollector.create(getDrillSidewaysAccumulator(dim, new FacetSearchParams(fsp.indexingParams, drillSidewaysRequest)));
+    }
+
+    DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+
+    searcher.search(dsq, hitCollector);
+
+    List<FacetResult> drillDownResults = drillDownCollector.getFacetResults();
+
+    List<FacetResult> mergedResults = new ArrayList<FacetResult>();
+    for(int i=0;i<fsp.facetRequests.size();i++) {
+      FacetRequest fr = fsp.facetRequests.get(i);
+      assert fr.categoryPath.length > 0;
+      Integer dimIndex = drillDownDims.get(fr.categoryPath.components[0]);
+      if (dimIndex == null) {
+        // Pure drill down dim (the current query didn't
+        // drill down on this dim):
+        mergedResults.add(drillDownResults.get(i));
+      } else {
+        // Drill sideways dim:
+        List<FacetResult> sidewaysResult = drillSidewaysCollectors[dimIndex.intValue()].getFacetResults();
+
+        assert sidewaysResult.size() == 1: "size=" + sidewaysResult.size();
+        mergedResults.add(sidewaysResult.get(0));
+      }
+    }
+
+    return new DrillSidewaysResult(mergedResults, null);
+  }
+
+  /**
+   * Search, sorting by {@link Sort}, and computing
+   * drill down and sideways counts.
+   */
+  public DrillSidewaysResult search(DrillDownQuery query,
+                                    Filter filter, FieldDoc after, int topN, Sort sort, boolean doDocScores,
+                                    boolean doMaxScore, FacetSearchParams fsp) throws IOException {
+    if (filter != null) {
+      query = new DrillDownQuery(filter, query);
+    }
+    if (sort != null) {
+      final TopFieldCollector hitCollector = TopFieldCollector.create(sort,
+                                                                      Math.min(topN, searcher.getIndexReader().maxDoc()),
+                                                                      after,
+                                                                      true,
+                                                                      doDocScores,
+                                                                      doMaxScore,
+                                                                      true);
+      DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(query, hitCollector, fsp);
+      r.hits = hitCollector.topDocs();
+      return r;
+    } else {
+      return search(after, query, topN, fsp);
+    }
+  }
+
+  /**
+   * Search, sorting by score, and computing
+   * drill down and sideways counts.
+   */
+  public DrillSidewaysResult search(ScoreDoc after,
+                                    DrillDownQuery query, int topN, FacetSearchParams fsp) throws IOException {
+    TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), after, true);
+    DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(query, hitCollector, fsp);
+    r.hits = hitCollector.topDocs();
+    return r;
+  }
+
+  /** Override this to use a custom drill-down {@link
+   *  FacetsAccumulator}. */
+  protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) {
+    return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
+  }
+
+  /** Override this to use a custom drill-sideways {@link
+   *  FacetsAccumulator}. */
+  protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) {
+    return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
+  }
+
+  /** Represents the returned result from a drill sideways
+   *  search. */
+  public static class DrillSidewaysResult {
+    /** Combined drill down & sideways results. */
+    public final List<FacetResult> facetResults;
+
+    /** Hits. */
+    public TopDocs hits;
+
+    DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
+      this.facetResults = facetResults;
+      this.hits = hits;
+    }
+  }
+}
+

Added: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java?rev=1449972&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java (added)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java Tue Feb 26 00:02:16 2013
@@ -0,0 +1,169 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Bits;
+
+class DrillSidewaysQuery extends Query {
+  final Query baseQuery;
+  final Collector drillDownCollector;
+  final Collector[] drillSidewaysCollectors;
+  final Term[][] drillDownTerms;
+
+  DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Term[][] drillDownTerms) {
+    this.baseQuery = baseQuery;
+    this.drillDownCollector = drillDownCollector;
+    this.drillSidewaysCollectors = drillSidewaysCollectors;
+    this.drillDownTerms = drillDownTerms;
+  }
+
+  @Override
+  public String toString(String field) {
+    return "DrillSidewaysQuery";
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    Query newQuery = baseQuery;
+    while(true) {
+      Query rewrittenQuery = newQuery.rewrite(reader);
+      if (rewrittenQuery == newQuery) {
+        break;
+      }
+      newQuery = rewrittenQuery;
+    }
+    if (newQuery == baseQuery) {
+      return this;
+    } else {
+      return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+    }
+  }
+  
+  @Override
+  public Weight createWeight(IndexSearcher searcher) throws IOException {
+    final Weight baseWeight = baseQuery.createWeight(searcher);
+
+    return new Weight() {
+      @Override
+      public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
+        return baseWeight.explain(context, doc);
+      }
+
+      @Override
+      public Query getQuery() {
+        return baseQuery;
+      }
+
+      @Override
+      public float getValueForNormalization() throws IOException {
+        return baseWeight.getValueForNormalization();
+      }
+
+      @Override
+      public void normalize(float norm, float topLevelBoost) {
+        baseWeight.normalize(norm, topLevelBoost);
+      }
+
+      @Override
+      public boolean scoresDocsOutOfOrder() {
+        // TODO: would be nice if AssertingIndexSearcher
+        // confirmed this for us
+        return false;
+      }
+
+      @Override
+      public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
+                           boolean topScorer, Bits acceptDocs) throws IOException {
+
+        DrillSidewaysScorer.DocsEnumsAndFreq[] dims = new DrillSidewaysScorer.DocsEnumsAndFreq[drillDownTerms.length];
+        TermsEnum termsEnum = null;
+        String lastField = null;
+        int nullCount = 0;
+        for(int dim=0;dim<dims.length;dim++) {
+          dims[dim] = new DrillSidewaysScorer.DocsEnumsAndFreq();
+          dims[dim].sidewaysCollector = drillSidewaysCollectors[dim];
+          String field = drillDownTerms[dim][0].field();
+          dims[dim].dim = drillDownTerms[dim][0].text();
+          if (lastField == null || !lastField.equals(field)) {
+            AtomicReader reader = context.reader();
+            Terms terms = reader.terms(field);
+            if (terms != null) {
+              termsEnum = terms.iterator(null);
+            }
+            lastField = field;
+          }
+          if (termsEnum == null) {
+            nullCount++;
+            continue;
+          }
+          dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
+          for(int i=0;i<drillDownTerms[dim].length;i++) {
+            if (termsEnum.seekExact(drillDownTerms[dim][i].bytes(), false)) {
+              dims[dim].freq = Math.max(dims[dim].freq, termsEnum.docFreq());
+              dims[dim].docsEnums[i] = termsEnum.docs(null, null);
+            }
+          }
+        }
+
+        if (nullCount > 1) {
+          return null;
+        }
+
+        // Sort drill-downs by most restrictive first:
+        Arrays.sort(dims);
+
+        // TODO: it could be better if we take acceptDocs
+        // into account instead of baseScorer?
+        Scorer baseScorer = baseWeight.scorer(context, scoreDocsInOrder, false, acceptDocs);
+
+        if (baseScorer == null) {
+          return null;
+        }
+
+        return new DrillSidewaysScorer(this, context,
+                                       baseScorer,
+                                       drillDownCollector, dims);
+      }
+    };
+  }
+
+  @Override
+  public int hashCode() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    throw new UnsupportedOperationException();
+  }
+}

Added: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java?rev=1449972&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java (added)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java Tue Feb 26 00:02:16 2013
@@ -0,0 +1,634 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.FixedBitSet;
+
+class DrillSidewaysScorer extends Scorer {
+
+  //private static boolean DEBUG = false;
+
+  private final Collector drillDownCollector;
+
+  private final DocsEnumsAndFreq[] dims;
+
+  // DrillDown DocsEnums:
+  private final Scorer baseScorer;
+
+  private final AtomicReaderContext context;
+
+  private static final int CHUNK = 2048;
+  private static final int MASK = CHUNK-1;
+
+  private int collectDocID = -1;
+  private float collectScore;
+
+  DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
+                      DocsEnumsAndFreq[] dims) {
+    super(w);
+    this.dims = dims;
+    this.context = context;
+    this.baseScorer = baseScorer;
+    this.drillDownCollector = drillDownCollector;
+  }
+
+  @Override
+  public void score(Collector collector) throws IOException {
+    //if (DEBUG) {
+    //  System.out.println("\nscore: reader=" + context.reader());
+    //}
+    //System.out.println("score r=" + context.reader());
+    collector.setScorer(this);
+    drillDownCollector.setScorer(this);
+    drillDownCollector.setNextReader(context);
+    for(DocsEnumsAndFreq dim : dims) {
+      dim.sidewaysCollector.setScorer(this);
+      dim.sidewaysCollector.setNextReader(context);
+    }
+
+    // TODO: if we ever allow null baseScorer ... it will
+    // mean we DO score docs out of order ... hmm, or if we
+    // change up the order of the conjuntions below
+    assert baseScorer != null;
+
+    // Position all scorers to their first matching doc:
+    int baseDocID = baseScorer.nextDoc();
+
+    for(DocsEnumsAndFreq dim : dims) {
+      for(DocsEnum docsEnum : dim.docsEnums) {
+        if (docsEnum != null) {
+          docsEnum.nextDoc();
+        }
+      }
+    }
+
+    final int numDims = dims.length;
+
+    DocsEnum[][] docsEnums = new DocsEnum[numDims][];
+    Collector[] sidewaysCollectors = new Collector[numDims];
+    int maxFreq = 0;
+    for(int dim=0;dim<numDims;dim++) {
+      docsEnums[dim] = dims[dim].docsEnums;
+      sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
+      maxFreq = Math.max(maxFreq, dims[dim].freq);
+    }
+
+    // TODO: if we add cost API to Scorer, switch to that!
+    int estBaseHitCount = context.reader().maxDoc() / (1+baseDocID);
+
+    /*
+    System.out.println("\nbaseDocID=" + baseDocID + " est=" + estBaseHitCount);
+    System.out.println("  maxDoc=" + context.reader().maxDoc());
+    System.out.println("  maxFreq=" + maxFreq);
+    System.out.println("  dims[0].freq=" + dims[0].freq);
+    if (numDims > 1) {
+      System.out.println("  dims[1].freq=" + dims[1].freq);
+    }
+    */
+
+    if (estBaseHitCount < maxFreq/10) {
+      //System.out.println("baseAdvance");
+      doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
+    } else if (numDims > 1 && (dims[1].freq < estBaseHitCount/10)) {
+      //System.out.println("drillDownAdvance");
+      doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
+    } else {
+      //System.out.println("union");
+      doUnionScoring(collector, docsEnums, sidewaysCollectors);
+    }
+  }
+
+  /** Used when drill downs are highly constraining vs
+   *  baseQuery. */
+  private void doDrillDownAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
+    final int maxDoc = context.reader().maxDoc();
+    final int numDims = dims.length;
+
+    //if (DEBUG) {
+    //  System.out.println("  doDrillDownAdvanceScoring");
+    //}
+
+    // TODO: maybe a class like BS, instead of parallel arrays
+    int[] filledSlots = new int[CHUNK];
+    int[] docIDs = new int[CHUNK];
+    float[] scores = new float[CHUNK];
+    int[] missingDims = new int[CHUNK];
+    int[] counts = new int[CHUNK];
+
+    docIDs[0] = -1;
+    int nextChunkStart = CHUNK;
+
+    final FixedBitSet seen = new FixedBitSet(CHUNK);
+
+    while (true) {
+      //if (DEBUG) {
+      //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
+      //}
+
+      // First dim:
+      //if (DEBUG) {
+      //  System.out.println("  dim0");
+      //}
+      for(DocsEnum docsEnum : docsEnums[0]) {
+        if (docsEnum == null) {
+          continue;
+        }
+        int docID = docsEnum.docID();
+        while (docID < nextChunkStart) {
+          int slot = docID & MASK;
+
+          if (docIDs[slot] != docID) {
+            seen.set(slot);
+            // Mark slot as valid:
+            //if (DEBUG) {
+            //  System.out.println("    set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
+            //}
+            docIDs[slot] = docID;
+            missingDims[slot] = 1;
+            counts[slot] = 1;
+          }
+
+          docID = docsEnum.nextDoc();
+        }
+      }
+
+      // Second dim:
+      //if (DEBUG) {
+      //  System.out.println("  dim1");
+      //}
+      for(DocsEnum docsEnum : docsEnums[1]) {
+        if (docsEnum == null) {
+          continue;
+        }
+        int docID = docsEnum.docID();
+        while (docID < nextChunkStart) {
+          int slot = docID & MASK;
+
+          if (docIDs[slot] != docID) {
+            // Mark slot as valid:
+            seen.set(slot);
+            //if (DEBUG) {
+            //  System.out.println("    set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
+            //}
+            docIDs[slot] = docID;
+            missingDims[slot] = 0;
+            counts[slot] = 1;
+          } else {
+            // TODO: single-valued dims will always be true
+            // below; we could somehow specialize
+            if (missingDims[slot] >= 1) {
+              missingDims[slot] = 2;
+              counts[slot] = 2;
+              //if (DEBUG) {
+              //  System.out.println("    set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
+              //}
+            } else {
+              counts[slot] = 1;
+              //if (DEBUG) {
+              //  System.out.println("    set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
+              //}
+            }
+          }
+
+          docID = docsEnum.nextDoc();
+        }
+      }
+
+      // After this we can "upgrade" to conjunction, because
+      // any doc not seen by either dim 0 or dim 1 cannot be
+      // a hit or a near miss:
+
+      //if (DEBUG) {
+      //  System.out.println("  baseScorer");
+      //}
+
+      // Fold in baseScorer, using advance:
+      int filledCount = 0;
+      int slot0 = 0;
+      while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != -1) {
+        int ddDocID = docIDs[slot0];
+        assert ddDocID != -1;
+
+        int baseDocID = baseScorer.docID();
+        if (baseDocID < ddDocID) {
+          baseDocID = baseScorer.advance(ddDocID);
+        }
+        if (baseDocID == ddDocID) {
+          //if (DEBUG) {
+          //  System.out.println("    keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
+          //}
+          scores[slot0] = baseScorer.score();
+          filledSlots[filledCount++] = slot0;
+          counts[slot0]++;
+        } else {
+          //if (DEBUG) {
+          //  System.out.println("    no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
+          //}
+          docIDs[slot0] = -1;
+
+          // TODO: we could jump slot0 forward to the
+          // baseDocID ... but we'd need to set docIDs for
+          // intervening slots to -1
+        }
+        slot0++;
+      }
+      seen.clear(0, CHUNK);
+
+      if (filledCount == 0) {
+        if (nextChunkStart >= maxDoc) {
+          break;
+        }
+        nextChunkStart += CHUNK;
+        continue;
+      }
+      
+      // TODO: factor this out & share w/ union scorer,
+      // except we start from dim=2 instead:
+      for(int dim=2;dim<numDims;dim++) {
+        //if (DEBUG) {
+        //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
+        //}
+        for(DocsEnum docsEnum : docsEnums[dim]) {
+          if (docsEnum == null) {
+            continue;
+          }
+          int docID = docsEnum.docID();
+          while (docID < nextChunkStart) {
+            int slot = docID & MASK;
+            if (docIDs[slot] == docID && counts[slot] >= dim) {
+              // TODO: single-valued dims will always be true
+              // below; we could somehow specialize
+              if (missingDims[slot] >= dim) {
+                //if (DEBUG) {
+                //  System.out.println("    set docID=" + docID + " count=" + (dim+2));
+                //}
+                missingDims[slot] = dim+1;
+                counts[slot] = dim+2;
+              } else {
+                //if (DEBUG) {
+                //  System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
+                //}
+                counts[slot] = dim+1;
+              }
+            }
+            // TODO: sometimes use advance?
+            docID = docsEnum.nextDoc();
+          }
+        }
+      }
+
+      // Collect:
+      //if (DEBUG) {
+      //  System.out.println("  now collect: " + filledCount + " hits");
+      //}
+      for(int i=0;i<filledCount;i++) {
+        int slot = filledSlots[i];
+        collectDocID = docIDs[slot];
+        collectScore = scores[slot];
+        //if (DEBUG) {
+        //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
+        //}
+        if (counts[slot] == 1+numDims) {
+          collectHit(collector, sidewaysCollectors);
+        } else if (counts[slot] == numDims) {
+          collectNearMiss(sidewaysCollectors, missingDims[slot]);
+        }
+      }
+
+      if (nextChunkStart >= maxDoc) {
+        break;
+      }
+
+      nextChunkStart += CHUNK;
+    }
+  }
+
+  /** Used when base query is highly constraining vs the
+   *  drilldowns; in this case we just .next() on base and
+   *  .advance() on the dims. */
+  private void doBaseAdvanceScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
+    //if (DEBUG) {
+    //  System.out.println("  doBaseAdvanceScoring");
+    //}
+    int docID = baseScorer.docID();
+
+    final int numDims = dims.length;
+
+    nextDoc: while (docID != NO_MORE_DOCS) {
+      int failedDim = -1;
+      for(int dim=0;dim<numDims;dim++) {
+        // TODO: should we sort this 2nd dimension of
+        // docsEnums from most frequent to least?
+        boolean found = false;
+        for(DocsEnum docsEnum : docsEnums[dim]) {
+          if (docsEnum == null) {
+            continue;
+          }
+          if (docsEnum.docID() < docID) {
+            docsEnum.advance(docID);
+          }
+          if (docsEnum.docID() == docID) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          if (failedDim != -1) {
+            // More than one dim fails on this document, so
+            // it's neither a hit nor a near-miss; move to
+            // next doc:
+            docID = baseScorer.nextDoc();
+            continue nextDoc;
+          } else {
+            failedDim = dim;
+          }
+        }
+      }
+
+      collectDocID = docID;
+
+      // TODO: we could score on demand instead since we are
+      // daat here:
+      collectScore = baseScorer.score();
+
+      if (failedDim == -1) {
+        collectHit(collector, sidewaysCollectors);
+      } else {
+        collectNearMiss(sidewaysCollectors, failedDim);
+      }
+
+      docID = baseScorer.nextDoc();
+    }
+  }
+
+  private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
+    //if (DEBUG) {
+    //  System.out.println("      hit");
+    //}
+
+    collector.collect(collectDocID);
+    drillDownCollector.collect(collectDocID);
+
+    // TODO: we could "fix" faceting of the sideways counts
+    // to do this "union" (of the drill down hits) in the
+    // end instead:
+
+    // Tally sideways counts:
+    for(int dim=0;dim<sidewaysCollectors.length;dim++) {
+      sidewaysCollectors[dim].collect(collectDocID);
+    }
+  }
+
+  private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
+    //if (DEBUG) {
+    //  System.out.println("      missingDim=" + dim);
+    //}
+    sidewaysCollectors[dim].collect(collectDocID);
+  }
+
+  private void doUnionScoring(Collector collector, DocsEnum[][] docsEnums, Collector[] sidewaysCollectors) throws IOException {
+    //if (DEBUG) {
+    //  System.out.println("  doUnionScoring");
+    //}
+
+    final int maxDoc = context.reader().maxDoc();
+    final int numDims = dims.length;
+
+    // TODO: maybe a class like BS, instead of parallel arrays
+    int[] filledSlots = new int[CHUNK];
+    int[] docIDs = new int[CHUNK];
+    float[] scores = new float[CHUNK];
+    int[] missingDims = new int[CHUNK];
+    int[] counts = new int[CHUNK];
+
+    docIDs[0] = -1;
+
+    // NOTE: this is basically a specialized version of
+    // BooleanScorer, to the minShouldMatch=N-1 case, but
+    // carefully tracking which dimension failed to match
+
+    int nextChunkStart = CHUNK;
+
+    while (true) {
+      //if (DEBUG) {
+      //  System.out.println("\ncycle nextChunkStart=" + nextChunkStart + " docIds[0]=" + docIDs[0]);
+      //}
+      int filledCount = 0;
+      int docID = baseScorer.docID();
+      //if (DEBUG) {
+      //  System.out.println("  base docID=" + docID);
+      //}
+      while (docID < nextChunkStart) {
+        int slot = docID & MASK;
+        //if (DEBUG) {
+        //  System.out.println("    docIDs[slot=" + slot + "]=" + docID + " id=" + context.reader().document(docID).get("id"));
+        //}
+
+        // Mark slot as valid:
+        assert docIDs[slot] != docID: "slot=" + slot + " docID=" + docID;
+        docIDs[slot] = docID;
+        scores[slot] = baseScorer.score();
+        filledSlots[filledCount++] = slot;
+        missingDims[slot] = 0;
+        counts[slot] = 1;
+
+        docID = baseScorer.nextDoc();
+      }
+
+      if (filledCount == 0) {
+        if (nextChunkStart >= maxDoc) {
+          break;
+        }
+        nextChunkStart += CHUNK;
+        continue;
+      }
+
+      // First drill-down dim, basically adds SHOULD onto
+      // the baseQuery:
+      //if (DEBUG) {
+      //  System.out.println("  dim=0 [" + dims[0].dim + "]");
+      //}
+      for(DocsEnum docsEnum : docsEnums[0]) {
+        if (docsEnum == null) {
+          continue;
+        }
+        docID = docsEnum.docID();
+        //if (DEBUG) {
+        //  System.out.println("    start docID=" + docID);
+        //}
+        while (docID < nextChunkStart) {
+          int slot = docID & MASK;
+          if (docIDs[slot] == docID) {
+            //if (DEBUG) {
+            //  System.out.println("      set docID=" + docID + " count=2");
+            //}
+            missingDims[slot] = 1;
+            counts[slot] = 2;
+          }
+          docID = docsEnum.nextDoc();
+        }
+      }
+
+      for(int dim=1;dim<numDims;dim++) {
+        //if (DEBUG) {
+        //  System.out.println("  dim=" + dim + " [" + dims[dim].dim + "]");
+        //}
+        for(DocsEnum docsEnum : docsEnums[dim]) {
+          if (docsEnum == null) {
+            continue;
+          }
+          docID = docsEnum.docID();
+          //if (DEBUG) {
+          //  System.out.println("    start docID=" + docID);
+          //}
+          while (docID < nextChunkStart) {
+            int slot = docID & MASK;
+            if (docIDs[slot] == docID && counts[slot] >= dim) {
+              // This doc is still in the running...
+              // TODO: single-valued dims will always be true
+              // below; we could somehow specialize
+              if (missingDims[slot] >= dim) {
+                //if (DEBUG) {
+                //  System.out.println("      set docID=" + docID + " count=" + (dim+2));
+                //}
+                missingDims[slot] = dim+1;
+                counts[slot] = dim+2;
+              } else {
+                //if (DEBUG) {
+                //  System.out.println("      set docID=" + docID + " missing count=" + (dim+1));
+                //}
+                counts[slot] = dim+1;
+              }
+            }
+            docID = docsEnum.nextDoc();
+          }
+
+          // TODO: sometimes use advance?
+
+          /*
+            int docBase = nextChunkStart - CHUNK;
+            for(int i=0;i<filledCount;i++) {
+              int slot = filledSlots[i];
+              docID = docBase + filledSlots[i];
+              if (docIDs[slot] == docID && counts[slot] >= dim) {
+                // This doc is still in the running...
+                int ddDocID = docsEnum.docID();
+                if (ddDocID < docID) {
+                  ddDocID = docsEnum.advance(docID);
+                }
+                if (ddDocID == docID) {
+                  if (missingDims[slot] >= dim && counts[slot] == allMatchCount) {
+                  //if (DEBUG) {
+                  //    System.out.println("    set docID=" + docID + " count=" + (dim+2));
+                   // }
+                    missingDims[slot] = dim+1;
+                    counts[slot] = dim+2;
+                  } else {
+                  //if (DEBUG) {
+                  //    System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
+                   // }
+                    counts[slot] = dim+1;
+                  }
+                }
+              }
+            }            
+          */
+        }
+      }
+
+      // Collect:
+      //if (DEBUG) {
+      //  System.out.println("  now collect: " + filledCount + " hits");
+      //}
+      for(int i=0;i<filledCount;i++) {
+        int slot = filledSlots[i];
+        collectDocID = docIDs[slot];
+        collectScore = scores[slot];
+        //if (DEBUG) {
+        //  System.out.println("    docID=" + docIDs[slot] + " count=" + counts[slot]);
+        //}
+        //System.out.println("  collect doc=" + collectDocID + " main.freq=" + (counts[slot]-1) + " main.doc=" + collectDocID + " exactCount=" + numDims);
+        if (counts[slot] == 1+numDims) {
+          //System.out.println("    hit");
+          collectHit(collector, sidewaysCollectors);
+        } else if (counts[slot] == numDims) {
+          //System.out.println("    sw");
+          collectNearMiss(sidewaysCollectors, missingDims[slot]);
+        }
+      }
+
+      if (nextChunkStart >= maxDoc) {
+        break;
+      }
+
+      nextChunkStart += CHUNK;
+    }
+  }
+
+  @Override
+  public int docID() {
+    return collectDocID;
+  }
+
+  @Override
+  public float score() {
+    return collectScore;
+  }
+
+  @Override
+  public int freq() {
+    return 1+dims.length;
+  }
+
+  @Override
+  public int nextDoc() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int advance(int target) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Collection<ChildScorer> getChildren() {
+    return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
+  }
+
+  static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
+    DocsEnum[] docsEnums;
+    // Max docFreq for all docsEnums for this dim:
+    int freq;
+    Collector sidewaysCollector;
+    String dim;
+
+    @Override
+    public int compareTo(DocsEnumsAndFreq other) {
+      return freq - other.freq;
+    }
+  }
+}

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java Tue Feb 26 00:02:16 2013
@@ -60,6 +60,26 @@ public class FacetsAccumulator {
   public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
     this(searchParams, indexReader, taxonomyReader, null);
   }
+
+  /**
+   * Creates an appropriate {@link FacetsAccumulator},
+   * returning {@link FacetsAccumulator} when all requests
+   * are {@link CountFacetRequest} and only one partition is
+   * in use, otherwise {@link StandardFacetsAccumulator}.
+   */
+  public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
+    if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) {
+      return new StandardFacetsAccumulator(fsp, indexReader, taxoReader);
+    }
+    
+    for (FacetRequest fr : fsp.facetRequests) {
+      if (!(fr instanceof CountFacetRequest)) {
+        return new StandardFacetsAccumulator(fsp, indexReader, taxoReader);
+      }
+    }
+    
+    return new FacetsAccumulator(fsp, indexReader, taxoReader);
+  }
   
   /**
    * Initializes the accumulator with the given parameters as well as
@@ -153,6 +173,12 @@ public class FacetsAccumulator {
     for (FacetRequest fr : searchParams.facetRequests) {
       int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath);
       if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
+        // Add empty FacetResult:
+        FacetResultNode root = new FacetResultNode();
+        root.ordinal = TaxonomyReader.INVALID_ORDINAL;
+        root.label = fr.categoryPath;
+        root.value = 0;
+        res.add(new FacetResult(fr, root, 0));
         continue;
       }
       CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath);

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java Tue Feb 26 00:02:16 2013
@@ -163,21 +163,11 @@ public abstract class FacetsCollector ex
   }
   
   /**
-   * Creates a {@link FacetsCollector} with the default
-   * {@link FacetsAccumulator}.
+   * Creates a {@link FacetsCollector} using the {@link
+   * FacetsAccumulator} from {@link FacetsAccumulator#create}.
    */
   public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
-    if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) {
-      return create(new StandardFacetsAccumulator(fsp, indexReader, taxoReader));
-    }
-    
-    for (FacetRequest fr : fsp.facetRequests) {
-      if (!(fr instanceof CountFacetRequest)) {
-        return create(new StandardFacetsAccumulator(fsp, indexReader, taxoReader));
-      }
-    }
-    
-    return create(new FacetsAccumulator(fsp, indexReader, taxoReader));
+    return create(FacetsAccumulator.create(fsp, indexReader, taxoReader));
   }
 
   /**

Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java Tue Feb 26 00:02:16 2013
@@ -197,7 +197,13 @@ public class StandardFacetsAccumulator e
         PartitionsFacetResultsHandler frHndlr = createFacetResultsHandler(fr);
         IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
         if (tmpResult == null) {
-          continue; // do not add a null to the list.
+          // Add empty FacetResult:
+          FacetResultNode root = new FacetResultNode();
+          root.ordinal = TaxonomyReader.INVALID_ORDINAL;
+          root.label = fr.categoryPath;
+          root.value = 0;
+          res.add(new FacetResult(fr, root, 0));
+          continue;
         }
         FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
         // final labeling if allowed (because labeling is a costly operation)

Added: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java?rev=1449972&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (added)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java Tue Feb 26 00:02:16 2013
@@ -0,0 +1,829 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.facet.FacetTestCase;
+import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util._TestUtil;
+
+public class TestDrillSideways extends FacetTestCase {
+
+  private DirectoryTaxonomyWriter taxoWriter;
+  private RandomIndexWriter writer;
+  private FacetFields facetFields;
+
+  private void add(String ... categoryPaths) throws IOException {
+    Document doc = new Document();
+    List<CategoryPath> paths = new ArrayList<CategoryPath>();
+    for(String categoryPath : categoryPaths) {
+      paths.add(new CategoryPath(categoryPath, '/'));
+    }
+    facetFields.addFields(doc, paths);
+    writer.addDocument(doc);
+  }
+
+  public void testBasic() throws Exception {
+    Directory dir = newDirectory();
+    Directory taxoDir = newDirectory();
+    writer = new RandomIndexWriter(random(), dir);
+
+    // Writes facet ords to a separate directory from the
+    // main index:
+    taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
+
+    // Reused across documents, to add the necessary facet
+    // fields:
+    facetFields = new FacetFields(taxoWriter);
+
+    add("Author/Bob", "Publish Date/2010/10/15");
+    add("Author/Lisa", "Publish Date/2010/10/20");
+    add("Author/Lisa", "Publish Date/2012/1/1");
+    add("Author/Susan", "Publish Date/2012/1/7");
+    add("Author/Frank", "Publish Date/1999/5/5");
+
+    // NRT open
+    IndexSearcher searcher = newSearcher(writer.getReader());
+    writer.close();
+
+    //System.out.println("searcher=" + searcher);
+
+    // NRT open
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    taxoWriter.close();
+
+    // Count both "Publish Date" and "Author" dimensions, in
+    // drill-down:
+    FacetSearchParams fsp = new FacetSearchParams(
+        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
+        new CountFacetRequest(new CategoryPath("Author"), 10));
+
+    // Simple case: drill-down on a single field; in this
+    // case the drill-sideways + drill-down counts ==
+    // drill-down of just the query: 
+    DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Author", "Lisa"));
+    DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+
+    assertEquals(2, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is only drill-down, and Lisa published
+    // one in 2012 and one in 2010:
+    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down: Lisa
+    // (drill-down) published twice, and Frank/Susan/Bob
+    // published once:
+    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
+
+    // Same simple case, but no baseQuery (pure browse):
+    // drill-down on a single field; in this case the
+    // drill-sideways + drill-down counts == drill-down of
+    // just the query:
+    ddq = new DrillDownQuery(fsp.indexingParams);
+    ddq.add(new CategoryPath("Author", "Lisa"));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+
+    assertEquals(2, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is only drill-down, and Lisa published
+    // one in 2012 and one in 2010:
+    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down: Lisa
+    // (drill-down) published twice, and Frank/Susan/Bob
+    // published once:
+    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
+
+    // Another simple case: drill-down on on single fields
+    // but OR of two values
+    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Author", "Lisa"), new CategoryPath("Author", "Bob"));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+    assertEquals(3, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is only drill-down: Lisa and Bob
+    // (drill-down) published twice in 2010 and once in 2012:
+    assertEquals("Publish Date: 2010=2 2012=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down: Lisa
+    // (drill-down) published twice, and Frank/Susan/Bob
+    // published once:
+    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
+
+    // More interesting case: drill-down on two fields
+    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Author", "Lisa"));
+    ddq.add(new CategoryPath("Publish Date", "2010"));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+    assertEquals(1, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is drill-sideways + drill-down: Lisa
+    // (drill-down) published once in 2010 and once in 2012:
+    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down:
+    // only Lisa & Bob published (once each) in 2010:
+    assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1)));
+
+    // Even more interesting case: drill down on two fields,
+    // but one of them is OR
+    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+
+    // Drill down on Lisa or Bob:
+    ddq.add(new CategoryPath("Author", "Lisa"),
+            new CategoryPath("Author", "Bob"));
+    ddq.add(new CategoryPath("Publish Date", "2010"));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+    assertEquals(2, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is both drill-sideways + drill-down:
+    // Lisa or Bob published twice in 2010 and once in 2012:
+    assertEquals("Publish Date: 2010=2 2012=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down:
+    // only Lisa & Bob published (once each) in 2010:
+    assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1)));
+
+    // Test drilling down on invalid field:
+    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Foobar", "Baz"));
+    fsp = new FacetSearchParams(
+        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
+        new CountFacetRequest(new CategoryPath("Foobar"), 10));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+    assertEquals(0, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    assertEquals("Publish Date:", toString(r.facetResults.get(0)));
+    assertEquals("Foobar:", toString(r.facetResults.get(1)));
+
+    // Test drilling down on valid term or'd with invalid term:
+    ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Author", "Lisa"),
+            new CategoryPath("Author", "Tom"));
+    fsp = new FacetSearchParams(
+        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
+        new CountFacetRequest(new CategoryPath("Author"), 10));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+    assertEquals(2, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is only drill-down, and Lisa published
+    // one in 2012 and one in 2010:
+    assertEquals("Publish Date: 2012=1 2010=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down: Lisa
+    // (drill-down) published twice, and Frank/Susan/Bob
+    // published once:
+    assertEquals("Author: Lisa=2 Frank=1 Susan=1 Bob=1", toString(r.facetResults.get(1)));
+
+    // Test main query gets null scorer:
+    fsp = new FacetSearchParams(
+        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
+        new CountFacetRequest(new CategoryPath("Author"), 10));
+    ddq = new DrillDownQuery(fsp.indexingParams, new TermQuery(new Term("foobar", "baz")));
+    ddq.add(new CategoryPath("Author", "Lisa"));
+    r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+
+    assertEquals(0, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    assertEquals("Publish Date:", toString(r.facetResults.get(0)));
+    assertEquals("Author:", toString(r.facetResults.get(1)));
+
+    searcher.getIndexReader().close();
+    taxoReader.close();
+    dir.close();
+    taxoDir.close();
+  }
+
+  public void testSometimesInvalidDrillDown() throws Exception {
+    Directory dir = newDirectory();
+    Directory taxoDir = newDirectory();
+    writer = new RandomIndexWriter(random(), dir);
+
+    // Writes facet ords to a separate directory from the
+    // main index:
+    taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
+
+    // Reused across documents, to add the necessary facet
+    // fields:
+    facetFields = new FacetFields(taxoWriter);
+
+    add("Author/Bob", "Publish Date/2010/10/15");
+    add("Author/Lisa", "Publish Date/2010/10/20");
+    writer.commit();
+    // 2nd segment has no Author:
+    add("Foobar/Lisa", "Publish Date/2012/1/1");
+
+    // NRT open
+    IndexSearcher searcher = newSearcher(writer.getReader());
+    writer.close();
+
+    //System.out.println("searcher=" + searcher);
+
+    // NRT open
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    taxoWriter.close();
+
+    // Count both "Publish Date" and "Author" dimensions, in
+    // drill-down:
+    FacetSearchParams fsp = new FacetSearchParams(
+        new CountFacetRequest(new CategoryPath("Publish Date"), 10), 
+        new CountFacetRequest(new CategoryPath("Author"), 10));
+
+    DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Author", "Lisa"));
+    DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(null, ddq, 10, fsp);
+
+    assertEquals(1, r.hits.totalHits);
+    assertEquals(2, r.facetResults.size());
+    // Publish Date is only drill-down, and Lisa published
+    // one in 2012 and one in 2010:
+    assertEquals("Publish Date: 2010=1", toString(r.facetResults.get(0)));
+    // Author is drill-sideways + drill-down: Lisa
+    // (drill-down) published once, and Bob
+    // published once:
+    assertEquals("Author: Lisa=1 Bob=1", toString(r.facetResults.get(1)));
+
+    searcher.getIndexReader().close();
+    taxoReader.close();
+    dir.close();
+    taxoDir.close();
+  }
+
+  private static class Doc implements Comparable<Doc> {
+    String id;
+    String contentToken;
+
+    // -1 if the doc is missing this dim, else the index
+    // -into the values for this dim:
+    int[] dims;
+
+    // 2nd value per dim for the doc (so we test
+    // multi-valued fields):
+    int[] dims2;
+    boolean deleted;
+
+    @Override
+    public int compareTo(Doc other) {
+      return id.compareTo(other.id);
+    }
+  }
+
+  private double aChance, bChance, cChance;
+
+  private String randomContentToken(boolean isQuery) {
+    double d = random().nextDouble();
+    if (isQuery) {
+      if (d < 0.33) {
+        return "a";
+      } else if (d < 0.66) {
+        return "b";
+      } else {
+        return "c";
+      }
+    } else {
+      if (d <= aChance) {
+        return "a";
+      } else if (d < aChance + bChance) {
+        return "b";
+      } else {
+        return "c";
+      }
+    }
+  }
+
+  public void testRandom() throws Exception {
+
+    while (aChance == 0.0) {
+      aChance = random().nextDouble();
+    }
+    while (bChance == 0.0) {
+      bChance = random().nextDouble();
+    }
+    while (cChance == 0.0) {
+      cChance = random().nextDouble();
+    }
+    /*
+    aChance = .01;
+    bChance = 0.5;
+    cChance = 1.0;
+    */
+    double sum = aChance + bChance + cChance;
+    aChance /= sum;
+    bChance /= sum;
+    cChance /= sum;
+
+    int numDims = _TestUtil.nextInt(random(), 2, 5);
+    //int numDims = 3;
+    int numDocs = atLeast(3000);
+    //int numDocs = 20;
+    if (VERBOSE) {
+      System.out.println("numDims=" + numDims + " numDocs=" + numDocs + " aChance=" + aChance + " bChance=" + bChance + " cChance=" + cChance);
+    }
+    String[][] dimValues = new String[numDims][];
+    int valueCount = 2;
+    for(int dim=0;dim<numDims;dim++) {
+      Set<String> values = new HashSet<String>();
+      while (values.size() < valueCount) {
+        String s = _TestUtil.randomRealisticUnicodeString(random());
+        //String s = _TestUtil.randomSimpleString(random());
+        if (s.length() > 0) {
+          values.add(s);
+        }
+      } 
+      dimValues[dim] = values.toArray(new String[values.size()]);
+      valueCount *= 2;
+    }
+
+    List<Doc> docs = new ArrayList<Doc>();
+    for(int i=0;i<numDocs;i++) {
+      Doc doc = new Doc();
+      doc.id = ""+i;
+      doc.contentToken = randomContentToken(false);
+      doc.dims = new int[numDims];
+      doc.dims2 = new int[numDims];
+      for(int dim=0;dim<numDims;dim++) {
+        if (random().nextInt(5) == 3) {
+          // This doc is missing this dim:
+          doc.dims[dim] = -1;
+        } else if (dimValues[dim].length <= 4) {
+          int dimUpto = 0;
+          doc.dims[dim] = dimValues[dim].length-1;
+          while (dimUpto < dimValues[dim].length) {
+            if (random().nextBoolean()) {
+              doc.dims[dim] = dimUpto;
+              break;
+            }
+            dimUpto++;
+          }
+        } else {
+          doc.dims[dim] = random().nextInt(dimValues[dim].length);
+        }
+
+        if (random().nextInt(5) == 3) {
+          // 2nd value:
+          doc.dims2[dim] = random().nextInt(dimValues[dim].length);
+        } else {
+          doc.dims2[dim] = -1;
+        }
+      }
+      docs.add(doc);
+    }
+
+    Directory d = newDirectory();
+    Directory td = newDirectory();
+
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setInfoStream(InfoStream.NO_OUTPUT);
+    RandomIndexWriter w = new RandomIndexWriter(random(), d, iwc);
+    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
+    facetFields = new FacetFields(tw);
+
+    for(Doc rawDoc : docs) {
+      Document doc = new Document();
+      doc.add(newStringField("id", rawDoc.id, Field.Store.YES));
+      doc.add(newStringField("content", rawDoc.contentToken, Field.Store.NO));
+      List<CategoryPath> paths = new ArrayList<CategoryPath>();
+
+      if (VERBOSE) {
+        System.out.println("  doc id=" + rawDoc.id + " token=" + rawDoc.contentToken);
+      }
+      for(int dim=0;dim<numDims;dim++) {
+        int dimValue = rawDoc.dims[dim];
+        if (dimValue != -1) {
+          paths.add(new CategoryPath("dim" + dim, dimValues[dim][dimValue]));
+          doc.add(new StringField("dim" + dim, dimValues[dim][dimValue], Field.Store.YES));
+          if (VERBOSE) {
+            System.out.println("    dim" + dim + "=" + dimValues[dim][dimValue]);
+          }
+        }
+        int dimValue2 = rawDoc.dims2[dim];
+        if (dimValue2 != -1) {
+          paths.add(new CategoryPath("dim" + dim, dimValues[dim][dimValue2]));
+          doc.add(new StringField("dim" + dim, dimValues[dim][dimValue2], Field.Store.YES));
+          if (VERBOSE) {
+            System.out.println("      dim" + dim + "=" + dimValues[dim][dimValue2]);
+          }
+        }
+      }
+      if (!paths.isEmpty()) {
+        facetFields.addFields(doc, paths);
+      }
+      w.addDocument(doc);
+    }
+
+    if (random().nextBoolean()) {
+      // Randomly delete a few docs:
+      int numDel = _TestUtil.nextInt(random(), 1, (int) (numDocs*0.05));
+      if (VERBOSE) {
+        System.out.println("delete " + numDel);
+      }
+      int delCount = 0;
+      while (delCount < numDel) {
+        Doc doc = docs.get(random().nextInt(docs.size()));
+        if (!doc.deleted) {
+          if (VERBOSE) {
+            System.out.println("  delete id=" + doc.id);
+          }
+          doc.deleted = true;
+          w.deleteDocuments(new Term("id", doc.id));
+          delCount++;
+        }
+      }
+    }
+
+    if (random().nextBoolean()) {
+      if (VERBOSE) {
+        System.out.println("TEST: forceMerge(1)...");
+      }
+      w.forceMerge(1);
+    }
+    IndexReader r = w.getReader();
+    w.close();
+    if (VERBOSE) {
+      System.out.println("r.numDocs() = " + r.numDocs());
+    }
+
+    // NRT open
+    TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
+    tw.close();
+
+    List<FacetRequest> requests = new ArrayList<FacetRequest>();
+    for(int i=0;i<numDims;i++) {
+      requests.add(new CountFacetRequest(new CategoryPath("dim" + i), dimValues[numDims-1].length));
+    }
+
+    FacetSearchParams fsp = new FacetSearchParams(requests);
+    IndexSearcher s = new IndexSearcher(r);
+
+    int numIters = atLeast(10);
+
+    for(int iter=0;iter<numIters;iter++) {
+      String contentToken = random().nextInt(30) == 17 ? null : randomContentToken(true);
+      int numDrillDown = _TestUtil.nextInt(random(), 1, Math.min(4, numDims));
+      String[][] drillDowns = new String[numDims][];
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " baseQuery=" + contentToken + " numDrillDown=" + numDrillDown);
+      }
+      int count = 0;
+      while (count < numDrillDown) {
+        int dim = random().nextInt(numDims);
+        if (drillDowns[dim] == null) {
+          if (random().nextBoolean()) {
+            // Drill down on one value:
+            drillDowns[dim] = new String[] {dimValues[dim][random().nextInt(dimValues[dim].length)]};
+          } else {
+            int orCount = _TestUtil.nextInt(random(), 1, Math.min(5, dimValues[dim].length));
+            drillDowns[dim] = new String[orCount];
+            for(int i=0;i<orCount;i++) {
+              while (true) {
+                String value = dimValues[dim][random().nextInt(dimValues[dim].length)];
+                for(int j=0;j<i;j++) {
+                  if (value.equals(drillDowns[dim][j])) {
+                    value = null;
+                    break;
+                  }
+                }
+                if (value != null) {
+                  drillDowns[dim][i] = value;
+                  break;
+                }
+              }
+            }
+          }
+          if (VERBOSE) {
+            System.out.println("  dim" + dim + "=" + Arrays.toString(drillDowns[dim]));
+          }
+          count++;
+        }
+      }
+
+      Query baseQuery;
+      if (contentToken == null) {
+        baseQuery = new MatchAllDocsQuery();
+      } else {
+        baseQuery = new TermQuery(new Term("content", contentToken));
+      }
+
+      DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, baseQuery);
+
+      for(int dim=0;dim<numDims;dim++) {
+        if (drillDowns[dim] != null) {
+          CategoryPath[] paths = new CategoryPath[drillDowns[dim].length];
+          int upto = 0;
+          for(String value : drillDowns[dim]) {
+            paths[upto++] = new CategoryPath("dim" + dim, value);
+          }
+          ddq.add(paths);
+        }
+      }
+
+      Filter filter;
+      if (random().nextInt(7) == 6) {
+        if (VERBOSE) {
+          System.out.println("  only-even filter");
+        }
+        filter = new Filter() {
+            @Override
+            public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
+              int maxDoc = context.reader().maxDoc();
+              final FixedBitSet bits = new FixedBitSet(maxDoc);
+              for(int docID=0;docID < maxDoc;docID++) {
+                // Keeps only the even ids:
+                if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) {
+                  bits.set(docID);
+                }
+              }
+              return bits;
+            }
+          };
+      } else {
+        filter = null;
+      }
+
+      // Verify docs are always collected in order:
+      new DrillSideways(s, tr).search(ddq,
+                           new Collector() {
+                             int lastDocID;
+
+                             @Override
+                             public void setScorer(Scorer s) {
+                             }
+
+                             @Override
+                             public void collect(int doc) {
+                               assert doc > lastDocID;
+                               lastDocID = doc;
+                             }
+
+                             @Override
+                             public void setNextReader(AtomicReaderContext context) {
+                               lastDocID = -1;
+                             }
+
+                             @Override
+                             public boolean acceptsDocsOutOfOrder() {
+                               return false;
+                             }
+                           }, fsp);
+
+      SimpleFacetResult expected = slowDrillSidewaysSearch(s, docs, contentToken, drillDowns, dimValues, filter);
+
+      Sort sort = new Sort(new SortField("id", SortField.Type.STRING));
+      DrillSidewaysResult actual = new DrillSideways(s, tr).search(ddq, filter, null, numDocs, sort, true, true, fsp);
+
+      TopDocs hits = s.search(baseQuery, numDocs);
+      Map<String,Float> scores = new HashMap<String,Float>();
+      for(ScoreDoc sd : hits.scoreDocs) {
+        scores.put(s.doc(sd.doc).get("id"), sd.score);
+      }
+      
+      verifyEquals(dimValues, s, expected, actual, scores);
+    }
+
+    tr.close();
+    r.close();
+    td.close();
+    d.close();
+  }
+
+  private static class Counters {
+    int[][] counts;
+
+    public Counters(String[][] dimValues) {
+      counts = new int[dimValues.length][];
+      for(int dim=0;dim<dimValues.length;dim++) {
+        counts[dim] = new int[dimValues[dim].length];
+      }
+    }
+
+    public void inc(int[] dims, int[] dims2) {
+      inc(dims, dims2, -1);
+    }
+
+    public void inc(int[] dims, int[] dims2, int onlyDim) {
+      assert dims.length == counts.length;
+      assert dims2.length == counts.length;
+      for(int dim=0;dim<dims.length;dim++) {
+        if (onlyDim == -1 || dim == onlyDim) {
+          if (dims[dim] != -1) {
+            counts[dim][dims[dim]]++;
+          }
+          if (dims2[dim] != -1 && dims2[dim] != dims[dim]) {
+            counts[dim][dims2[dim]]++;
+          }
+        }
+      }
+    }
+  }
+
+  private static class SimpleFacetResult {
+    List<Doc> hits;
+    int[][] counts;
+  }
+
+  private SimpleFacetResult slowDrillSidewaysSearch(IndexSearcher s, List<Doc> docs, String contentToken, String[][] drillDowns,
+                                                    String[][] dimValues, Filter onlyEven) throws Exception {
+    int numDims = dimValues.length;
+
+    List<Doc> hits = new ArrayList<Doc>();
+    Counters drillDownCounts = new Counters(dimValues);
+    Counters[] drillSidewaysCounts = new Counters[dimValues.length];
+    for(int dim=0;dim<numDims;dim++) {
+      drillSidewaysCounts[dim] = new Counters(dimValues);
+    }
+
+    if (VERBOSE) {
+      System.out.println("  compute expected");
+    }
+
+    nextDoc: for(Doc doc : docs) {
+      if (doc.deleted) {
+        continue;
+      }
+      if (onlyEven != null & (Integer.parseInt(doc.id) & 1) != 0) {
+        continue;
+      }
+      if (contentToken == null || doc.contentToken.equals(contentToken)) {
+        int failDim = -1;
+        for(int dim=0;dim<numDims;dim++) {
+          if (drillDowns[dim] != null) {
+            String docValue = doc.dims[dim] == -1 ? null : dimValues[dim][doc.dims[dim]];
+            String docValue2 = doc.dims2[dim] == -1 ? null : dimValues[dim][doc.dims2[dim]];
+            boolean matches = false;
+            for(String value : drillDowns[dim]) {
+              if (value.equals(docValue) || value.equals(docValue2)) {
+                matches = true;
+                break;
+              }
+            }
+            if (!matches) {
+              if (failDim == -1) {
+                // Doc could be a near-miss, if no other dim fails
+                failDim = dim;
+              } else {
+                // Doc isn't a hit nor a near-miss
+                continue nextDoc;
+              }
+            }
+          }
+        }
+
+        if (failDim == -1) {
+          if (VERBOSE) {
+            System.out.println("    exp: id=" + doc.id + " is a hit");
+          }
+          // Hit:
+          hits.add(doc);
+          drillDownCounts.inc(doc.dims, doc.dims2);
+          for(int dim=0;dim<dimValues.length;dim++) {
+            drillSidewaysCounts[dim].inc(doc.dims, doc.dims2);
+          }
+        } else {
+          if (VERBOSE) {
+            System.out.println("    exp: id=" + doc.id + " is a near-miss on dim=" + failDim);
+          }
+          drillSidewaysCounts[failDim].inc(doc.dims, doc.dims2, failDim);
+        }
+      }
+    }
+
+    Map<String,Integer> idToDocID = new HashMap<String,Integer>();
+    for(int i=0;i<s.getIndexReader().maxDoc();i++) {
+      idToDocID.put(s.doc(i).get("id"), i);
+    }
+
+    Collections.sort(hits);
+
+    SimpleFacetResult res = new SimpleFacetResult();
+    res.hits = hits;
+    res.counts = new int[numDims][];
+    for(int dim=0;dim<numDims;dim++) {
+      if (drillDowns[dim] != null) {
+        res.counts[dim] = drillSidewaysCounts[dim].counts[dim];
+      } else {
+        res.counts[dim] = drillDownCounts.counts[dim];
+      }
+    }
+
+    return res;
+  }
+
+  void verifyEquals(String[][] dimValues, IndexSearcher s, SimpleFacetResult expected, DrillSidewaysResult actual, Map<String,Float> scores) throws Exception {
+    if (VERBOSE) {
+      System.out.println("  verify totHits=" + expected.hits.size());
+    }
+    assertEquals(expected.hits.size(), actual.hits.totalHits);
+    assertEquals(expected.hits.size(), actual.hits.scoreDocs.length);
+    for(int i=0;i<expected.hits.size();i++) {
+      if (VERBOSE) {
+        System.out.println("    hit " + i + " expected=" + expected.hits.get(i).id);
+      }
+      assertEquals(expected.hits.get(i).id,
+                   s.doc(actual.hits.scoreDocs[i].doc).get("id"));
+      // Score should be IDENTICAL:
+      assertEquals(scores.get(expected.hits.get(i).id), actual.hits.scoreDocs[i].score, 0.0f);
+    }
+    assertEquals(expected.counts.length, actual.facetResults.size());
+    for(int dim=0;dim<expected.counts.length;dim++) {
+      if (VERBOSE) {
+        System.out.println("    dim" + dim);
+        System.out.println("      actual");
+      }
+      FacetResult fr = actual.facetResults.get(dim);
+      Map<String,Integer> actualValues = new HashMap<String,Integer>();
+      for(FacetResultNode childNode : fr.getFacetResultNode().subResults) {
+        actualValues.put(childNode.label.components[1], (int) childNode.value);
+        if (VERBOSE) {
+          System.out.println("        " + childNode.label.components[1] + ": " + (int) childNode.value);
+        }
+      }
+
+      if (VERBOSE) {
+        System.out.println("      expected");
+      }
+
+      int setCount = 0;
+      for(int i=0;i<dimValues[dim].length;i++) {
+        String value = dimValues[dim][i];
+        if (expected.counts[dim][i] != 0) {
+          if (VERBOSE) {
+            System.out.println("        " + value + ": " + expected.counts[dim][i]);
+          } 
+          assertTrue(actualValues.containsKey(value));
+          assertEquals(expected.counts[dim][i], actualValues.get(value).intValue());
+          setCount++;
+        } else {
+          assertFalse(actualValues.containsKey(value));
+        }
+      }
+
+      assertEquals(setCount, actualValues.size());
+    }
+  }
+
+  /** Just gathers counts of values under the dim. */
+  private String toString(FacetResult fr) {
+    StringBuilder b = new StringBuilder();
+    FacetResultNode node = fr.getFacetResultNode();
+    b.append(node.label);
+    b.append(":");
+    for(FacetResultNode childNode : node.subResults) {
+      b.append(' ');
+      b.append(childNode.label.components[1]);
+      b.append('=');
+      b.append((int) childNode.value);
+    }
+    return b.toString();
+  }
+}
+

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java Tue Feb 26 00:02:16 2013
@@ -259,18 +259,30 @@ public class TestTopKInEachNodeResultHan
         assertEquals(0, node.subResults.size());
       }
       
-      fr = facetResults.get(6); // a/b, depth=0, K=2
+      fr = facetResults.get(6); // Doctor, depth=0, K=2
       hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
       assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
       parentRes = fr.getFacetResultNode();
-      assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
+      assertEquals(0.0, parentRes.value, Double.MIN_VALUE);
       assertEquals(0, parentRes.subResults.size());
       hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
 
       // doctor, depth=1, K=2
-      assertFalse("Shouldn't have found anything for a FacetRequest " +
-          "of a facet that doesn't exist in the index.", hasDoctor);
-      assertEquals("Shouldn't have found more than seven request.", 7, facetResults.size());
+      assertTrue("Should have found an empty FacetResult " +
+          "for a facet that doesn't exist in the index.", hasDoctor);
+      assertEquals("Shouldn't have found more than 8 request.", 8, facetResults.size());
+
+      fr = facetResults.get(7); // a/b, depth=0, K=2
+      assertEquals(0, fr.getNumValidDescendants());
+      parentRes = fr.getFacetResultNode();
+      assertEquals(8.0, parentRes.value, Double.MIN_VALUE);
+      assertEquals(0, parentRes.subResults.size());
+      i = 0;
+      for (FacetResultNode node : parentRes.subResults) {
+        assertEquals(expectedValues3[i++], node.value, Double.MIN_VALUE);
+        assertEquals(0, node.subResults.size());
+      }
+
       ir.close();
       tr.close();
       iDir.close();

Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java Tue Feb 26 00:02:16 2013
@@ -203,8 +203,8 @@ public class TestTopKResultsHandler exte
       List<FacetResult> facetResults = fc.getFacetResults();
       
       assertEquals("Shouldn't have found anything for a FacetRequest "
-          + "of a facet that doesn't exist in the index.", 0, facetResults.size());
-
+          + "of a facet that doesn't exist in the index.", 1, facetResults.size());
+      assertEquals("Miau Hattulla", facetResults.get(0).getFacetResultNode().label.components[0]);
       closeAll();
     }
   }

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java?rev=1449972&r1=1449971&r2=1449972&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java Tue Feb 26 00:02:16 2013
@@ -98,6 +98,9 @@ public class AssertingIndexSearcher exte
 
       @Override
       public boolean scoresDocsOutOfOrder() {
+        // TODO: if this returns false, we should wrap
+        // Scorer with AssertingScorer that confirms docIDs
+        // are in order?
         return w.scoresDocsOutOfOrder();
       }
     };