You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC

svn commit: r1487777 [24/50] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -48,7 +48,7 @@ public class MultiAssociationsFacetsAggr
    * Creates a new {@link MultiAssociationsFacetsAggregator} over the given
    * aggregators. The mapping is used by
    * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} to
-   * rollup the values of the speicfic category by the corresponding
+   * rollup the values of the specific category by the corresponding
    * {@link FacetsAggregator}. However, since each {@link FacetsAggregator}
    * handles the associations of a specific type, which could cover multiple
    * categories, the aggregation is done on the unique set of aggregators, which

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -7,7 +7,6 @@ import org.apache.lucene.facet.search.Fa
 import org.apache.lucene.facet.search.FacetRequest;
 import org.apache.lucene.facet.search.FacetsAggregator;
 import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.util.BytesRef;
 
@@ -32,6 +31,10 @@ import org.apache.lucene.util.BytesRef;
  * A {@link FacetsAggregator} which computes the weight of a category as the sum
  * of the float values associated with it in the result documents. Assumes that
  * the association encoded for each ordinal is {@link CategoryFloatAssociation}.
+ * <p>
+ * <b>NOTE:</b> this aggregator does not support
+ * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
+ * aggregates the categories for which you added a {@link CategoryAssociation}.
  * 
  * @lucene.experimental
  */
@@ -77,22 +80,9 @@ public class SumFloatAssociationFacetsAg
     return false;
   }
 
-  private float rollupValues(int ordinal, int[] children, int[] siblings, float[] scores) {
-    float Value = 0f;
-    while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
-      float childValue = scores[ordinal];
-      childValue += rollupValues(children[ordinal], children, siblings, scores);
-      scores[ordinal] = childValue;
-      Value += childValue;
-      ordinal = siblings[ordinal];
-    }
-    return Value;
-  }
-
   @Override
   public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
-    float[] values = facetArrays.getFloatArray();
-    values[ordinal] += rollupValues(children[ordinal], children, siblings, values);
+    // NO-OP: this aggregator does no rollup values to the parents.
   }
   
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -7,7 +7,6 @@ import org.apache.lucene.facet.search.Fa
 import org.apache.lucene.facet.search.FacetRequest;
 import org.apache.lucene.facet.search.FacetsAggregator;
 import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.util.BytesRef;
 
@@ -30,8 +29,13 @@ import org.apache.lucene.util.BytesRef;
 
 /**
  * A {@link FacetsAggregator} which computes the weight of a category as the sum
- * of the integer values associated with it in the result documents. Assumes that
- * the association encoded for each ordinal is {@link CategoryIntAssociation}.
+ * of the integer values associated with it in the result documents. Assumes
+ * that the association encoded for each ordinal is
+ * {@link CategoryIntAssociation}.
+ * <p>
+ * <b>NOTE:</b> this aggregator does not support
+ * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
+ * aggregates the categories for which you added a {@link CategoryAssociation}.
  */
 public class SumIntAssociationFacetsAggregator implements FacetsAggregator {
 
@@ -75,22 +79,9 @@ public class SumIntAssociationFacetsAggr
     return false;
   }
 
-  private float rollupValues(int ordinal, int[] children, int[] siblings, float[] scores) {
-    float Value = 0f;
-    while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
-      float childValue = scores[ordinal];
-      childValue += rollupValues(children[ordinal], children, siblings, scores);
-      scores[ordinal] = childValue;
-      Value += childValue;
-      ordinal = siblings[ordinal];
-    }
-    return Value;
-  }
-
   @Override
   public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
-    float[] values = facetArrays.getFloatArray();
-    values[ordinal] += rollupValues(children[ordinal], children, siblings, values);
+    // NO-OP: this aggregator does no rollup values to the parents.
   }
 
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html Thu May 30 07:53:18 2013
@@ -261,7 +261,7 @@ Following is a code snippet for indexing
 found in package <code>org.apache.lucene.facet.example.simple.SimpleIndexer</code>.
 <pre class="prettyprint lang-java linenums">
 IndexWriter writer = ...
-TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir);
 ...
 Document doc = new Document();
 doc.add(new Field("title", titleText, Store.YES, Index.ANALYZED));

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java Thu May 30 07:53:18 2013
@@ -152,12 +152,15 @@ public class CountingListBuilder impleme
       if (op != OrdinalPolicy.NO_PARENTS) {
         // need to add parents too
         int parent = taxoWriter.getParent(ordinal);
-        while (parent > 0) {
-          ordinals.ints[ordinals.length++] = parent;
-          parent = taxoWriter.getParent(parent);
-        }
-        if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
-          ordinals.length--;
+        if (parent > 0) {
+          // only do this if the category is not a dimension itself, otherwise, it was just discarded by the 'if' below
+          while (parent > 0) {
+            ordinals.ints[ordinals.length++] = parent;
+            parent = taxoWriter.getParent(parent);
+          }
+          if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
+            ordinals.length--;
+          }
         }
       }
     }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java Thu May 30 07:53:18 2013
@@ -48,18 +48,6 @@ import org.apache.lucene.util.IntsRef;
  */
 public class FacetFields {
 
-  // The counting list is written in a payload, but we don't store it
-  // nor need norms.
-  private static final FieldType COUNTING_LIST_PAYLOAD_TYPE = new FieldType();
-  static {
-    COUNTING_LIST_PAYLOAD_TYPE.setIndexed(true);
-    COUNTING_LIST_PAYLOAD_TYPE.setTokenized(true);
-    COUNTING_LIST_PAYLOAD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
-    COUNTING_LIST_PAYLOAD_TYPE.setStored(false);
-    COUNTING_LIST_PAYLOAD_TYPE.setOmitNorms(true);
-    COUNTING_LIST_PAYLOAD_TYPE.freeze();
-  }
-  
   // The drill-down field is added with a TokenStream, hence why it's based on
   // TextField type. However in practice, it is added just like StringField.
   // Therefore we set its IndexOptions to DOCS_ONLY.

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java Thu May 30 07:53:18 2013
@@ -3,6 +3,7 @@ package org.apache.lucene.facet.sampling
 import java.io.IOException;
 
 import org.apache.lucene.facet.search.FacetResult;
+import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 
 /*
@@ -23,22 +24,50 @@ import org.apache.lucene.facet.search.Sc
  */
 
 /**
- * Fixer of sample facet accumulation results
+ * Fixer of sample facet accumulation results.
  * 
  * @lucene.experimental
  */
-public interface SampleFixer {
+public abstract class SampleFixer {
   
   /**
    * Alter the input result, fixing it to account for the sampling. This
-   * implementation can compute accurate or estimated counts for the sampled facets. 
-   * For example, a faster correction could just multiply by a compensating factor.
+   * implementation can compute accurate or estimated counts for the sampled
+   * facets. For example, a faster correction could just multiply by a
+   * compensating factor.
    * 
    * @param origDocIds
    *          full set of matching documents.
    * @param fres
    *          sample result to be fixed.
-   * @throws IOException If there is a low-level I/O error.
+   * @throws IOException
+   *           If there is a low-level I/O error.
    */
-  public void fixResult(ScoredDocIDs origDocIds, FacetResult fres) throws IOException; 
+  public void fixResult(ScoredDocIDs origDocIds, FacetResult fres, double samplingRatio) throws IOException {
+    FacetResultNode topRes = fres.getFacetResultNode();
+    fixResultNode(topRes, origDocIds, samplingRatio);
+  }
+  
+  /**
+   * Fix result node count, and, recursively, fix all its children
+   * 
+   * @param facetResNode
+   *          result node to be fixed
+   * @param docIds
+   *          docids in effect
+   * @throws IOException
+   *           If there is a low-level I/O error.
+   */
+  protected void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) 
+      throws IOException {
+    singleNodeFix(facetResNode, docIds, samplingRatio);
+    for (FacetResultNode frn : facetResNode.subResults) {
+      fixResultNode(frn, docIds, samplingRatio);
+    }
+  }
+  
+  /** Fix the given node's value. */
+  protected abstract void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) 
+      throws IOException;
+  
 }
\ No newline at end of file

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java Thu May 30 07:53:18 2013
@@ -12,7 +12,6 @@ import org.apache.lucene.facet.search.Fa
 import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.IndexReader;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -111,16 +110,6 @@ public abstract class Sampler {
       throws IOException;
 
   /**
-   * Get a fixer of sample facet accumulation results. Default implementation
-   * returns a <code>TakmiSampleFixer</code> which is adequate only for
-   * counting. For any other accumulator, provide a different fixer.
-   */
-  public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
-      FacetSearchParams searchParams) {
-    return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
-  }
-  
-  /**
    * Result of sample computation
    */
   public final static class SampleResult {

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java Thu May 30 07:53:18 2013
@@ -79,7 +79,11 @@ public class SamplingAccumulator extends
   public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
     // Replacing the original searchParams with the over-sampled
     FacetSearchParams original = searchParams;
-    searchParams = sampler.overSampledSearchParams(original);
+    SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
+    final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+    if (shouldOversample) {
+      searchParams = sampler.overSampledSearchParams(original);
+    }
     
     List<FacetResult> sampleRes = super.accumulate(docids);
     
@@ -87,14 +91,18 @@ public class SamplingAccumulator extends
     for (FacetResult fres : sampleRes) {
       // for sure fres is not null because this is guaranteed by the delegee.
       PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
-      // fix the result of current request
-      sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres);
-      
-      fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
-
-      // Using the sampler to trim the extra (over-sampled) results
-      fres = sampler.trimResult(fres);
+      if (samplerFixer != null) {
+        // fix the result of current request
+        samplerFixer.fixResult(docids, fres, samplingRatio);
+        
+        fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
 
+        if (shouldOversample) {
+          // Using the sampler to trim the extra (over-sampled) results
+          fres = sampler.trimResult(fres);
+        }
+      }
+      
       // final labeling if allowed (because labeling is a costly operation)
       frh.labelResult(fres);
       fixedRes.add(fres); // add to final results

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java Thu May 30 07:53:18 2013
@@ -28,7 +28,7 @@ public class SamplingParams {
    * Default factor by which more results are requested over the sample set.
    * @see SamplingParams#getOversampleFactor()
    */
-  public static final double DEFAULT_OVERSAMPLE_FACTOR = 2d;
+  public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
   
   /**
    * Default ratio between size of sample to original size of document set.
@@ -59,6 +59,8 @@ public class SamplingParams {
   private double sampleRatio = DEFAULT_SAMPLE_RATIO;
   private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
   private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
+
+  private SampleFixer sampleFixer = null;
   
   /**
    * Return the maxSampleSize.
@@ -166,4 +168,29 @@ public class SamplingParams {
     this.oversampleFactor = oversampleFactor;
   }
 
-}
\ No newline at end of file
+  /**
+   * @return {@link SampleFixer} to be used while fixing the sampled results, if
+   *         <code>null</code> no fixing will be performed
+   */
+  public SampleFixer getSampleFixer() {
+    return sampleFixer;
+  }
+
+  /**
+   * Set a {@link SampleFixer} to be used while fixing the sampled results.
+   * {@code null} means no fixing will be performed
+   */
+  public void setSampleFixer(SampleFixer sampleFixer) {
+    this.sampleFixer = sampleFixer;
+  }
+
+  /**
+   * Returns whether over-sampling should be done. By default returns
+   * {@code true} when {@link #getSampleFixer()} is not {@code null} and
+   * {@link #getOversampleFactor()} &gt; 1, {@code false} otherwise.
+   */
+  public boolean shouldOverSample() {
+    return sampleFixer != null && oversampleFactor > 1d;
+  }
+  
+}

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java Thu May 30 07:53:18 2013
@@ -52,29 +52,41 @@ public class SamplingWrapper extends Sta
   public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
     // Replacing the original searchParams with the over-sampled (and without statistics-compute)
     FacetSearchParams original = delegee.searchParams;
-    delegee.searchParams = sampler.overSampledSearchParams(original);
+    boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+   
+    if (shouldOversample) {
+      delegee.searchParams = sampler.overSampledSearchParams(original);
+    }
     
     SampleResult sampleSet = sampler.getSampleSet(docids);
 
     List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
 
     List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+    SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
+    
     for (FacetResult fres : sampleRes) {
       // for sure fres is not null because this is guaranteed by the delegee.
       PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
-      // fix the result of current request
-      sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres); 
-      fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+      if (sampleFixer != null) {
+        // fix the result of current request
+        sampleFixer.fixResult(docids, fres, sampleSet.actualSampleRatio); 
+        fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+      }
       
-      // Using the sampler to trim the extra (over-sampled) results
-      fres = sampler.trimResult(fres);
+      if (shouldOversample) {
+        // Using the sampler to trim the extra (over-sampled) results
+        fres = sampler.trimResult(fres);
+      }
       
       // final labeling if allowed (because labeling is a costly operation)
       frh.labelResult(fres);
       fixedRes.add(fres); // add to final results
     }
 
-    delegee.searchParams = original; // Back to original params
+    if (shouldOversample) {
+      delegee.searchParams = original; // Back to original params
+    }
     
     return fixedRes; 
   }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java Thu May 30 07:53:18 2013
@@ -2,21 +2,19 @@ package org.apache.lucene.facet.sampling
 
 import java.io.IOException;
 
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.Bits;
-
 import org.apache.lucene.facet.params.FacetSearchParams;
 import org.apache.lucene.facet.search.DrillDownQuery;
-import org.apache.lucene.facet.search.FacetResult;
 import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 import org.apache.lucene.facet.search.ScoredDocIDsIterator;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,16 +34,21 @@ import org.apache.lucene.facet.taxonomy.
  */
 
 /**
- * Fix sampling results by counting the intersection between two lists: a
- * TermDocs (list of documents in a certain category) and a DocIdSetIterator
- * (list of documents matching the query).
- * 
+ * Fix sampling results by correct results, by counting the intersection between
+ * two lists: a TermDocs (list of documents in a certain category) and a
+ * DocIdSetIterator (list of documents matching the query).
+ * <p>
+ * This fixer is suitable for scenarios which prioritize accuracy over
+ * performance. 
+ * <p>
+ * <b>Note:</b> for statistically more accurate top-k selection, set
+ * {@link SamplingParams#setOversampleFactor(double) oversampleFactor} to at
+ * least 2, so that the top-k categories would have better chance of showing up
+ * in the sampled top-cK results (see {@link SamplingParams#getOversampleFactor}
  * 
  * @lucene.experimental
  */
-// TODO (Facet): implement also an estimated fixing by ratio (taking into
-// account "translation" of counts!)
-class TakmiSampleFixer implements SampleFixer {
+public class TakmiSampleFixer extends SampleFixer {
   
   private TaxonomyReader taxonomyReader;
   private IndexReader indexReader;
@@ -59,28 +62,10 @@ class TakmiSampleFixer implements Sample
   }
 
   @Override
-  public void fixResult(ScoredDocIDs origDocIds, FacetResult fres)
-      throws IOException {
-    FacetResultNode topRes = fres.getFacetResultNode();
-    fixResultNode(topRes, origDocIds);
-  }
-  
-  /**
-   * Fix result node count, and, recursively, fix all its children
-   * 
-   * @param facetResNode
-   *          result node to be fixed
-   * @param docIds
-   *          docids in effect
-   * @throws IOException If there is a low-level I/O error.
-   */
-  private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) throws IOException {
+  public void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) throws IOException {
     recount(facetResNode, docIds);
-    for (FacetResultNode frn : facetResNode.subResults) {
-      fixResultNode(frn, docIds);
-    }
   }
-
+  
   /**
    * Internal utility: recount for a facet result node
    * 
@@ -179,4 +164,5 @@ class TakmiSampleFixer implements Sample
     }
     return false; // exhausted
   }
+
 }
\ No newline at end of file

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java Thu May 30 07:53:18 2013
@@ -7,8 +7,8 @@ import java.util.Collections;
 import java.util.Comparator;
 
 import org.apache.lucene.facet.search.FacetRequest.SortOrder;
+import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
 import org.apache.lucene.util.PriorityQueue;
 
 /*
@@ -46,7 +46,7 @@ public abstract class DepthOneFacetResul
     
     @Override
     protected FacetResultNode getSentinelObject() {
-      return new FacetResultNode();
+      return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0);
     }
     
     @Override
@@ -80,7 +80,8 @@ public abstract class DepthOneFacetResul
    * Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The
    * given {@link PriorityQueue} is already filled with sentinel objects, so
    * implementations are encouraged to use {@link PriorityQueue#top()} and
-   * {@link PriorityQueue#updateTop()} for best performance.
+   * {@link PriorityQueue#updateTop()} for best performance.  Returns the total
+   * number of siblings.
    */
   protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq);
   
@@ -92,10 +93,8 @@ public abstract class DepthOneFacetResul
     
     int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
         
-    FacetResultNode root = new FacetResultNode();
-    root.ordinal = rootOrd;
+    FacetResultNode root = new FacetResultNode(rootOrd, valueOf(rootOrd));
     root.label = facetRequest.categoryPath;
-    root.value = valueOf(rootOrd);
     if (facetRequest.numResults > taxonomyReader.getSize()) {
       // specialize this case, user is interested in all available results
       ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
@@ -118,11 +117,11 @@ public abstract class DepthOneFacetResul
     
     // since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
     PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
-    int numResults = addSiblings(children[rootOrd], siblings, pq);
+    int numSiblings = addSiblings(children[rootOrd], siblings, pq);
 
     // pop() the least (sentinel) elements
     int pqsize = pq.size();
-    int size = numResults < pqsize ? numResults : pqsize;
+    int size = numSiblings < pqsize ? numSiblings : pqsize;
     for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
 
     // create the FacetResultNodes.
@@ -133,7 +132,7 @@ public abstract class DepthOneFacetResul
       subResults[i] = node;
     }
     root.subResults = Arrays.asList(subResults);
-    return new FacetResult(facetRequest, root, size);
+    return new FacetResult(facetRequest, root, numSiblings);
   }
   
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java Thu May 30 07:53:18 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.facet.search;
 
 import java.io.IOException;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.facet.params.CategoryListParams;
@@ -61,7 +62,7 @@ public final class DrillDownQuery extend
   
   private final BooleanQuery query;
   private final Map<String,Integer> drillDownDims = new LinkedHashMap<String,Integer>();
-  private final FacetIndexingParams fip;
+  final FacetIndexingParams fip;
 
   /** Used by clone() */
   DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Map<String,Integer> drillDownDims) {
@@ -87,6 +88,19 @@ public final class DrillDownQuery extend
     fip = other.fip;
   }
 
+  /** Used by DrillSideways */
+  DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
+    this.fip = fip;
+    this.query = new BooleanQuery(true);
+    if (baseQuery != null) {
+      query.add(baseQuery, Occur.MUST);      
+    }
+    for(Query clause : clauses) {
+      query.add(clause, Occur.MUST);
+    }
+    this.drillDownDims.putAll(drillDownDims);
+  }
+
   /**
    * Creates a new {@link DrillDownQuery} without a base query, 
    * to perform a pure browsing query (equivalent to using
@@ -139,11 +153,25 @@ public final class DrillDownQuery extend
       }
       q = bq;
     }
-    drillDownDims.put(dim, drillDownDims.size());
 
-    final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(q);
+    add(dim, q);
+  }
+
+  /** Expert: add a custom drill-down subQuery.  Use this
+   *  when you have a separate way to drill-down on the
+   *  dimension than the indexed facet ordinals. */
+  public void add(String dim, Query subQuery) {
+
+    // TODO: we should use FilteredQuery?
+
+    // So scores of the drill-down query don't have an
+    // effect:
+    final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subQuery);
     drillDownQuery.setBoost(0.0f);
+
     query.add(drillDownQuery, Occur.MUST);
+
+    drillDownDims.put(dim, drillDownDims.size());
   }
 
   @Override
@@ -153,7 +181,9 @@ public final class DrillDownQuery extend
   
   @Override
   public int hashCode() {
-    return query.hashCode();
+    final int prime = 31;
+    int result = super.hashCode();
+    return prime * result + query.hashCode();
   }
   
   @Override
@@ -163,7 +193,7 @@ public final class DrillDownQuery extend
     }
     
     DrillDownQuery other = (DrillDownQuery) obj;
-    return query.equals(other.query);
+    return query.equals(other.query) && super.equals(other);
   }
   
   @Override

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java Thu May 30 07:53:18 2013
@@ -19,11 +19,15 @@ package org.apache.lucene.facet.search;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.facet.params.FacetSearchParams;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
@@ -33,6 +37,7 @@ import org.apache.lucene.search.FieldDoc
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiCollector;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
@@ -40,6 +45,7 @@ import org.apache.lucene.search.TermQuer
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldCollector;
 import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.Weight;
 
 /**     
  * Computes drill down and sideways counts for the provided
@@ -71,27 +77,110 @@ public class DrillSideways {
     this.taxoReader = taxoReader;
   }
 
+  /** Moves any drill-downs that don't have a corresponding
+   *  facet request into the baseQuery.  This is unusual,
+   *  yet allowed, because typically the added drill-downs are because
+   *  the user has clicked on previously presented facets,
+   *  and those same facets would be computed this time
+   *  around. */
+  private static DrillDownQuery moveDrillDownOnlyClauses(DrillDownQuery in, FacetSearchParams fsp) {
+    Set<String> facetDims = new HashSet<String>();
+    for(FacetRequest fr : fsp.facetRequests) {
+      if (fr.categoryPath.length == 0) {
+        throw new IllegalArgumentException("all FacetRequests must have CategoryPath with length > 0");
+      }
+      facetDims.add(fr.categoryPath.components[0]);
+    }
+
+    BooleanClause[] clauses = in.getBooleanQuery().getClauses();
+    Map<String,Integer> drillDownDims = in.getDims();
+
+    String[] dimsByIndex = new String[drillDownDims.size()];
+    for(Map.Entry<String,Integer> ent : drillDownDims.entrySet()) {
+      dimsByIndex[ent.getValue()] = ent.getKey();
+    }
+
+    int startClause;
+    if (clauses.length == drillDownDims.size()) {
+      startClause = 0;
+    } else {
+      assert clauses.length == 1+drillDownDims.size();
+      startClause = 1;
+    }
+
+    // Break out drill-down clauses that have no
+    // corresponding facet request and move them inside the
+    // baseQuery:
+    List<Query> nonFacetClauses = new ArrayList<Query>();
+    List<Query> facetClauses = new ArrayList<Query>();
+    Map<String,Integer> dimToIndex = new LinkedHashMap<String,Integer>();
+    for(int i=startClause;i<clauses.length;i++) {
+      Query q = clauses[i].getQuery();
+      String dim = dimsByIndex[i-startClause];
+      if (!facetDims.contains(dim)) {
+        nonFacetClauses.add(q);
+      } else {
+        facetClauses.add(q);
+        dimToIndex.put(dim, dimToIndex.size());
+      }
+    }
+
+    if (!nonFacetClauses.isEmpty()) {
+      BooleanQuery newBaseQuery = new BooleanQuery(true);
+      if (startClause == 1) {
+        // Add original basaeQuery:
+        newBaseQuery.add(clauses[0].getQuery(), BooleanClause.Occur.MUST);
+      }
+      for(Query q : nonFacetClauses) {
+        newBaseQuery.add(q, BooleanClause.Occur.MUST);
+      }
+
+      return new DrillDownQuery(fsp.indexingParams, newBaseQuery, facetClauses, dimToIndex);
+    } else {
+      // No change:
+      return in;
+    }
+  }
+
   /**
    * Search, collecting hits with a {@link Collector}, and
    * computing drill down and sideways counts.
    */
+  @SuppressWarnings({"rawtypes","unchecked"})
   public DrillSidewaysResult search(DrillDownQuery query,
                                     Collector hitCollector, FacetSearchParams fsp) throws IOException {
 
+    if (query.fip != fsp.indexingParams) {
+      throw new IllegalArgumentException("DrillDownQuery's FacetIndexingParams should match FacetSearchParams'");
+    }
+
+    query = moveDrillDownOnlyClauses(query, fsp);
+
     Map<String,Integer> drillDownDims = query.getDims();
 
     if (drillDownDims.isEmpty()) {
-      throw new IllegalArgumentException("there must be at least one drill-down");
+      // Just do ordinary search when there are no drill-downs:
+      FacetsCollector c = FacetsCollector.create(getDrillDownAccumulator(fsp));
+      searcher.search(query, MultiCollector.wrap(hitCollector, c));
+      return new DrillSidewaysResult(c.getFacetResults(), null);      
     }
 
-    BooleanQuery ddq = query.getBooleanQuery();
-    BooleanClause[] clauses = ddq.getClauses();
-
-    for(FacetRequest fr :  fsp.facetRequests) {
-      if (fr.categoryPath.length == 0) {
-        throw new IllegalArgumentException("all FacetRequests must have CategoryPath with length > 0");
+    List<FacetRequest> ddRequests = new ArrayList<FacetRequest>();
+    for(FacetRequest fr : fsp.facetRequests) {
+      assert fr.categoryPath.length > 0;
+      if (!drillDownDims.containsKey(fr.categoryPath.components[0])) {
+        ddRequests.add(fr);
       }
     }
+    FacetSearchParams fsp2;
+    if (!ddRequests.isEmpty()) {
+      fsp2 = new FacetSearchParams(fsp.indexingParams, ddRequests);
+    } else {
+      fsp2 = null;
+    }
+
+    BooleanQuery ddq = query.getBooleanQuery();
+    BooleanClause[] clauses = ddq.getClauses();
 
     Query baseQuery;
     int startClause;
@@ -106,54 +195,82 @@ public class DrillSideways {
       startClause = 1;
     }
 
-    Term[][] drillDownTerms = new Term[clauses.length-startClause][];
-    for(int i=startClause;i<clauses.length;i++) {
-      Query q = clauses[i].getQuery();
-      assert q instanceof ConstantScoreQuery;
-      q = ((ConstantScoreQuery) q).getQuery();
-      assert q instanceof TermQuery || q instanceof BooleanQuery;
-      if (q instanceof TermQuery) {
-        drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
-      } else {
-        BooleanQuery q2 = (BooleanQuery) q;
-        BooleanClause[] clauses2 = q2.getClauses();
-        drillDownTerms[i-startClause] = new Term[clauses2.length];
-        for(int j=0;j<clauses2.length;j++) {
-          assert clauses2[j].getQuery() instanceof TermQuery;
-          drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
-        }
-      }
-    }
-
-    FacetsCollector drillDownCollector = FacetsCollector.create(getDrillDownAccumulator(fsp));
+    FacetsCollector drillDownCollector = fsp2 == null ? null : FacetsCollector.create(getDrillDownAccumulator(fsp2));
 
     FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
 
     int idx = 0;
     for(String dim : drillDownDims.keySet()) {
-      FacetRequest drillSidewaysRequest = null;
+      List<FacetRequest> requests = new ArrayList<FacetRequest>();
       for(FacetRequest fr : fsp.facetRequests) {
         assert fr.categoryPath.length > 0;
         if (fr.categoryPath.components[0].equals(dim)) {
-          if (drillSidewaysRequest != null) {
-            throw new IllegalArgumentException("multiple FacetRequests for drill-sideways dimension \"" + dim + "\"");
-          }
-          drillSidewaysRequest = fr;
+          requests.add(fr);
         }
       }
-      if (drillSidewaysRequest == null) {
+      if (requests.isEmpty()) {
         throw new IllegalArgumentException("could not find FacetRequest for drill-sideways dimension \"" + dim + "\"");
       }
-      drillSidewaysCollectors[idx++] = FacetsCollector.create(getDrillSidewaysAccumulator(dim, new FacetSearchParams(fsp.indexingParams, drillSidewaysRequest)));
+      drillSidewaysCollectors[idx++] = FacetsCollector.create(getDrillSidewaysAccumulator(dim, new FacetSearchParams(fsp.indexingParams, requests)));
     }
 
-    DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+    boolean useCollectorMethod = scoreSubDocsAtOnce();
 
-    searcher.search(dsq, hitCollector);
+    Term[][] drillDownTerms = null;
 
-    List<FacetResult> drillDownResults = drillDownCollector.getFacetResults();
+    if (!useCollectorMethod) {
+      // Optimistic: assume subQueries of the DDQ are either
+      // TermQuery or BQ OR of TermQuery; if this is wrong
+      // then we detect it and fallback to the mome general
+      // but slower DrillSidewaysCollector:
+      drillDownTerms = new Term[clauses.length-startClause][];
+      for(int i=startClause;i<clauses.length;i++) {
+        Query q = clauses[i].getQuery();
+
+        // DrillDownQuery always wraps each subQuery in
+        // ConstantScoreQuery:
+        assert q instanceof ConstantScoreQuery;
+
+        q = ((ConstantScoreQuery) q).getQuery();
+
+        if (q instanceof TermQuery) {
+          drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
+        } else if (q instanceof BooleanQuery) {
+          BooleanQuery q2 = (BooleanQuery) q;
+          BooleanClause[] clauses2 = q2.getClauses();
+          drillDownTerms[i-startClause] = new Term[clauses2.length];
+          for(int j=0;j<clauses2.length;j++) {
+            if (clauses2[j].getQuery() instanceof TermQuery) {
+              drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
+            } else {
+              useCollectorMethod = true;
+              break;
+            }
+          }
+        } else {
+          useCollectorMethod = true;
+        }
+      }
+    }
+
+    if (useCollectorMethod) {
+      // TODO: maybe we could push the "collector method"
+      // down into the optimized scorer to have a tighter
+      // integration ... and so TermQuery clauses could
+      // continue to run "optimized"
+      collectorMethod(query, baseQuery, startClause, hitCollector, drillDownCollector, drillSidewaysCollectors);
+    } else {
+      DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+      searcher.search(dsq, hitCollector);
+    }
+
+    int numDims = drillDownDims.size();
+    List<FacetResult>[] drillSidewaysResults = new List[numDims];
+    List<FacetResult> drillDownResults = null;
 
     List<FacetResult> mergedResults = new ArrayList<FacetResult>();
+    int[] requestUpto = new int[drillDownDims.size()];
+    int ddUpto = 0;
     for(int i=0;i<fsp.facetRequests.size();i++) {
       FacetRequest fr = fsp.facetRequests.get(i);
       assert fr.categoryPath.length > 0;
@@ -161,19 +278,119 @@ public class DrillSideways {
       if (dimIndex == null) {
         // Pure drill down dim (the current query didn't
         // drill down on this dim):
-        mergedResults.add(drillDownResults.get(i));
+        if (drillDownResults == null) {
+          // Lazy init, in case all requests were against
+          // drill-sideways dims:
+          drillDownResults = drillDownCollector.getFacetResults();
+          //System.out.println("get DD results");
+        }
+        //System.out.println("add dd results " + i);
+        mergedResults.add(drillDownResults.get(ddUpto++));
       } else {
         // Drill sideways dim:
-        List<FacetResult> sidewaysResult = drillSidewaysCollectors[dimIndex.intValue()].getFacetResults();
-
-        assert sidewaysResult.size() == 1: "size=" + sidewaysResult.size();
-        mergedResults.add(sidewaysResult.get(0));
+        int dim = dimIndex.intValue();
+        List<FacetResult> sidewaysResult = drillSidewaysResults[dim];
+        if (sidewaysResult == null) {
+          // Lazy init, in case no facet request is against
+          // a given drill down dim:
+          sidewaysResult = drillSidewaysCollectors[dim].getFacetResults();
+          drillSidewaysResults[dim] = sidewaysResult;
+        }
+        mergedResults.add(sidewaysResult.get(requestUpto[dim]));
+        requestUpto[dim]++;
       }
     }
 
     return new DrillSidewaysResult(mergedResults, null);
   }
 
+  /** Uses the more general but slower method of sideways
+   *  counting. This method allows an arbitrary subQuery to
+   *  implement the drill down for a given dimension. */
+  private void collectorMethod(DrillDownQuery ddq, Query baseQuery, int startClause, Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors) throws IOException {
+
+    BooleanClause[] clauses = ddq.getBooleanQuery().getClauses();
+
+    Map<String,Integer> drillDownDims = ddq.getDims();
+
+    BooleanQuery topQuery = new BooleanQuery(true);
+    final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
+                                                                        drillDownDims);
+
+    // TODO: if query is already a BQ we could copy that and
+    // add clauses to it, instead of doing BQ inside BQ
+    // (should be more efficient)?  Problem is this can
+    // affect scoring (coord) ... too bad we can't disable
+    // coord on a clause by clause basis:
+    topQuery.add(baseQuery, BooleanClause.Occur.MUST);
+
+    // NOTE: in theory we could just make a single BQ, with
+    // +query a b c minShouldMatch=2, but in this case,
+    // annoyingly, BS2 wraps a sub-scorer that always
+    // returns 2 as the .freq(), not how many of the
+    // SHOULD clauses matched:
+    BooleanQuery subQuery = new BooleanQuery(true);
+
+    Query wrappedSubQuery = new QueryWrapper(subQuery,
+                                             new SetWeight() {
+                                               @Override
+                                               public void set(Weight w) {
+                                                 collector.setWeight(w, -1);
+                                               }
+                                             });
+    Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
+
+    // Don't impact score of original query:
+    constantScoreSubQuery.setBoost(0.0f);
+
+    topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
+
+    // Unfortunately this sub-BooleanQuery
+    // will never get BS1 because today BS1 only works
+    // if topScorer=true... and actually we cannot use BS1
+    // anyways because we need subDocsScoredAtOnce:
+    int dimIndex = 0;
+    for(int i=startClause;i<clauses.length;i++) {
+      Query q = clauses[i].getQuery();
+      // DrillDownQuery always wraps each subQuery in
+      // ConstantScoreQuery:
+      assert q instanceof ConstantScoreQuery;
+      q = ((ConstantScoreQuery) q).getQuery();
+
+      final int finalDimIndex = dimIndex;
+      subQuery.add(new QueryWrapper(q,
+                                    new SetWeight() {
+                                      @Override
+                                      public void set(Weight w) {
+                                        collector.setWeight(w, finalDimIndex);
+                                      }
+                                    }),
+                   BooleanClause.Occur.SHOULD);
+      dimIndex++;
+    }
+
+    // TODO: we could better optimize the "just one drill
+    // down" case w/ a separate [specialized]
+    // collector...
+    int minShouldMatch = drillDownDims.size()-1;
+    if (minShouldMatch == 0) {
+      // Must add another "fake" clause so BQ doesn't erase
+      // itself by rewriting to the single clause:
+      Query end = new MatchAllDocsQuery();
+      end.setBoost(0.0f);
+      subQuery.add(end, BooleanClause.Occur.SHOULD);
+      minShouldMatch++;
+    }
+
+    subQuery.setMinimumNumberShouldMatch(minShouldMatch);
+
+    // System.out.println("EXE " + topQuery);
+
+    // Collects against the passed-in
+    // drillDown/SidewaysCollectors as a side effect:
+    searcher.search(topQuery, collector);
+  }
+
   /**
    * Search, sorting by {@link Sort}, and computing
    * drill down and sideways counts.
@@ -192,9 +409,8 @@ public class DrillSideways {
                                                                       doDocScores,
                                                                       doMaxScore,
                                                                       true);
-      DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(query, hitCollector, fsp);
-      r.hits = hitCollector.topDocs();
-      return r;
+      DrillSidewaysResult r = search(query, hitCollector, fsp);
+      return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
     } else {
       return search(after, query, topN, fsp);
     }
@@ -207,36 +423,102 @@ public class DrillSideways {
   public DrillSidewaysResult search(ScoreDoc after,
                                     DrillDownQuery query, int topN, FacetSearchParams fsp) throws IOException {
     TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), after, true);
-    DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(query, hitCollector, fsp);
-    r.hits = hitCollector.topDocs();
-    return r;
+    DrillSidewaysResult r = search(query, hitCollector, fsp);
+    return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
   }
 
   /** Override this to use a custom drill-down {@link
    *  FacetsAccumulator}. */
-  protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) {
+  protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) throws IOException {
     return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
   }
 
   /** Override this to use a custom drill-sideways {@link
    *  FacetsAccumulator}. */
-  protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) {
+  protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException {
     return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
   }
 
-  /** Represents the returned result from a drill sideways
-   *  search. */
+  /** Override this and return true if your collector
+   *  (e.g., ToParentBlockJoinCollector) expects all
+   *  sub-scorers to be positioned on the document being
+   *  collected.  This will cause some performance loss;
+   *  default is false.  Note that if you return true from
+   *  this method (in a subclass) be sure your collector
+   *  also returns false from {@link
+   *  Collector#acceptsDocsOutOfOrder}: this will trick
+   *  BooleanQuery into also scoring all subDocs at once. */
+  protected boolean scoreSubDocsAtOnce() {
+    return false;
+  }
+
+  /**
+   * Represents the returned result from a drill sideways search. Note that if
+   * you called
+   * {@link DrillSideways#search(DrillDownQuery, Collector, FacetSearchParams)},
+   * then {@link #hits} will be {@code null}.
+   */
   public static class DrillSidewaysResult {
     /** Combined drill down & sideways results. */
     public final List<FacetResult> facetResults;
 
     /** Hits. */
-    public TopDocs hits;
+    public final TopDocs hits;
 
-    DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
+    public DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
       this.facetResults = facetResults;
       this.hits = hits;
     }
   }
+
+  private interface SetWeight {
+    public void set(Weight w);
+  }
+
+  /** Just records which Weight was given out for the
+   *  (possibly rewritten) Query. */
+  private static class QueryWrapper extends Query {
+    private final Query originalQuery;
+    private final SetWeight setter;
+
+    public QueryWrapper(Query originalQuery, SetWeight setter) {
+      this.originalQuery = originalQuery;
+      this.setter = setter;
+    }
+
+    @Override
+    public Weight createWeight(final IndexSearcher searcher) throws IOException {
+      Weight w = originalQuery.createWeight(searcher);
+      setter.set(w);
+      return w;
+    }
+
+    @Override
+    public Query rewrite(IndexReader reader) throws IOException {
+      Query rewritten = originalQuery.rewrite(reader);
+      if (rewritten != originalQuery) {
+        return new QueryWrapper(rewritten, setter);
+      } else {
+        return this;
+      }
+    }
+
+    @Override
+    public String toString(String s) {
+      return originalQuery.toString(s);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof QueryWrapper)) return false;
+      final QueryWrapper other = (QueryWrapper) o;
+      return super.equals(o) && originalQuery.equals(other.originalQuery);
+    }
+
+    @Override
+    public int hashCode() {
+      return super.hashCode() * 31 + originalQuery.hashCode();
+    }
+  }
 }
 

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java Thu May 30 07:53:18 2013
@@ -129,8 +129,11 @@ class DrillSidewaysQuery extends Query {
           dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
           for(int i=0;i<drillDownTerms[dim].length;i++) {
             if (termsEnum.seekExact(drillDownTerms[dim][i].bytes(), false)) {
-              dims[dim].freq = Math.max(dims[dim].freq, termsEnum.docFreq());
-              dims[dim].docsEnums[i] = termsEnum.docs(null, null);
+              DocsEnum docsEnum = termsEnum.docs(null, null);
+              if (docsEnum != null) {
+                dims[dim].docsEnums[i] = docsEnum;
+                dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
+              }
             }
           }
         }
@@ -157,13 +160,34 @@ class DrillSidewaysQuery extends Query {
     };
   }
 
+  // TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array
+
   @Override
   public int hashCode() {
-    throw new UnsupportedOperationException();
+    final int prime = 31;
+    int result = super.hashCode();
+    result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
+    result = prime * result
+        + ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
+    result = prime * result + Arrays.hashCode(drillDownTerms);
+    result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
+    return result;
   }
 
   @Override
   public boolean equals(Object obj) {
-    throw new UnsupportedOperationException();
+    if (this == obj) return true;
+    if (!super.equals(obj)) return false;
+    if (getClass() != obj.getClass()) return false;
+    DrillSidewaysQuery other = (DrillSidewaysQuery) obj;
+    if (baseQuery == null) {
+      if (other.baseQuery != null) return false;
+    } else if (!baseQuery.equals(other.baseQuery)) return false;
+    if (drillDownCollector == null) {
+      if (other.drillDownCollector != null) return false;
+    } else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
+    if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
+    if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
+    return true;
   }
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java Thu May 30 07:53:18 2013
@@ -63,8 +63,10 @@ class DrillSidewaysScorer extends Scorer
     //}
     //System.out.println("score r=" + context.reader());
     collector.setScorer(this);
-    drillDownCollector.setScorer(this);
-    drillDownCollector.setNextReader(context);
+    if (drillDownCollector != null) {
+      drillDownCollector.setScorer(this);
+      drillDownCollector.setNextReader(context);
+    }
     for(DocsEnumsAndFreq dim : dims) {
       dim.sidewaysCollector.setScorer(this);
       dim.sidewaysCollector.setNextReader(context);
@@ -76,8 +78,7 @@ class DrillSidewaysScorer extends Scorer
     assert baseScorer != null;
 
     // Position all scorers to their first matching doc:
-    int baseDocID = baseScorer.nextDoc();
-
+    baseScorer.nextDoc();
     for(DocsEnumsAndFreq dim : dims) {
       for(DocsEnum docsEnum : dim.docsEnums) {
         if (docsEnum != null) {
@@ -90,30 +91,33 @@ class DrillSidewaysScorer extends Scorer
 
     DocsEnum[][] docsEnums = new DocsEnum[numDims][];
     Collector[] sidewaysCollectors = new Collector[numDims];
-    int maxFreq = 0;
+    long drillDownCost = 0;
     for(int dim=0;dim<numDims;dim++) {
       docsEnums[dim] = dims[dim].docsEnums;
       sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
-      maxFreq = Math.max(maxFreq, dims[dim].freq);
+      for(DocsEnum de : dims[dim].docsEnums) {
+        if (de != null) {
+          drillDownCost += de.cost();
+        }
+      }
     }
 
-    // TODO: if we add cost API to Scorer, switch to that!
-    int estBaseHitCount = context.reader().maxDoc() / (1+baseDocID);
+    long baseQueryCost = baseScorer.cost();
 
     /*
-    System.out.println("\nbaseDocID=" + baseDocID + " est=" + estBaseHitCount);
+    System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount);
     System.out.println("  maxDoc=" + context.reader().maxDoc());
-    System.out.println("  maxFreq=" + maxFreq);
+    System.out.println("  maxCost=" + maxCost);
     System.out.println("  dims[0].freq=" + dims[0].freq);
     if (numDims > 1) {
       System.out.println("  dims[1].freq=" + dims[1].freq);
     }
     */
 
-    if (estBaseHitCount < maxFreq/10) {
+    if (baseQueryCost < drillDownCost/10) {
       //System.out.println("baseAdvance");
       doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
-    } else if (numDims > 1 && (dims[1].freq < estBaseHitCount/10)) {
+    } else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
       //System.out.println("drillDownAdvance");
       doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
     } else {
@@ -391,7 +395,9 @@ class DrillSidewaysScorer extends Scorer
     //}
 
     collector.collect(collectDocID);
-    drillDownCollector.collect(collectDocID);
+    if (drillDownCollector != null) {
+      drillDownCollector.collect(collectDocID);
+    }
 
     // TODO: we could "fix" faceting of the sideways counts
     // to do this "union" (of the drill down hits) in the
@@ -565,6 +571,9 @@ class DrillSidewaysScorer extends Scorer
       //  System.out.println("  now collect: " + filledCount + " hits");
       //}
       for(int i=0;i<filledCount;i++) {
+        // NOTE: This is actually in-order collection,
+        // because we only accept docs originally returned by
+        // the baseScorer (ie that Scorer is AND'd)
         int slot = filledSlots[i];
         collectDocID = docIDs[slot];
         collectScore = scores[slot];
@@ -615,20 +624,31 @@ class DrillSidewaysScorer extends Scorer
   }
 
   @Override
+  public long cost() {
+    return baseScorer.cost();
+  }
+
+  @Override
   public Collection<ChildScorer> getChildren() {
     return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
   }
 
   static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
     DocsEnum[] docsEnums;
-    // Max docFreq for all docsEnums for this dim:
-    int freq;
+    // Max cost for all docsEnums for this dim:
+    long maxCost;
     Collector sidewaysCollector;
     String dim;
 
     @Override
     public int compareTo(DocsEnumsAndFreq other) {
-      return freq - other.freq;
+      if (maxCost < other.maxCost) {
+        return -1;
+      } else if (maxCost > other.maxCost) {
+        return 1;
+      } else {
+        return 0;
+      }
     }
   }
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java Thu May 30 07:53:18 2013
@@ -1,5 +1,15 @@
 package org.apache.lucene.facet.search;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.CollectionUtil;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,6 +35,140 @@ package org.apache.lucene.facet.search;
  */
 public class FacetResult {
   
+  private static FacetResultNode addIfNotExist(Map<CategoryPath, FacetResultNode> nodes, FacetResultNode node) {
+    FacetResultNode n = nodes.get(node.label);
+    if (n == null) {
+      nodes.put(node.label, node);
+      n = node;
+    }
+    return n;
+  }
+
+  /**
+   * A utility for merging multiple {@link FacetResult} of the same
+   * (hierarchical) dimension into a single {@link FacetResult}, to reconstruct
+   * the hierarchy. The results are merged according to the following rules:
+   * <ul>
+   * <li>If two results share the same dimension (first component in their
+   * {@link CategoryPath}), they are merged.
+   * <li>If a result is missing ancestors in the other results, e.g. A/B/C but
+   * no corresponding A or A/B, these nodes are 'filled' with their label,
+   * ordinal and value (obtained from the respective {@link FacetArrays}).
+   * <li>If a result does not share a dimension with other results, it is
+   * returned as is.
+   * </ul>
+   * <p>
+   * <b>NOTE:</b> the returned results are not guaranteed to be in the same
+   * order of the input ones.
+   * 
+   * @param results
+   *          the results to merge
+   * @param taxoReader
+   *          the {@link TaxonomyReader} to use when creating missing ancestor
+   *          nodes
+   * @param dimArrays
+   *          a mapping from a dimension to the respective {@link FacetArrays}
+   *          from which to pull the nodes values
+   */
+  public static List<FacetResult> mergeHierarchies(List<FacetResult> results, TaxonomyReader taxoReader,
+      Map<String, FacetArrays> dimArrays) throws IOException {
+    final Map<String, List<FacetResult>> dims = new HashMap<String,List<FacetResult>>();
+    for (FacetResult fr : results) {
+      String dim = fr.getFacetRequest().categoryPath.components[0];
+      List<FacetResult> frs = dims.get(dim);
+      if (frs == null) {
+        frs = new ArrayList<FacetResult>();
+        dims.put(dim, frs);
+      }
+      frs.add(fr);
+    }
+
+    final List<FacetResult> res = new ArrayList<FacetResult>();
+    for (List<FacetResult> frs : dims.values()) {
+      FacetResult mergedResult = frs.get(0);
+      if (frs.size() > 1) {
+        CollectionUtil.introSort(frs, new Comparator<FacetResult>() {
+          @Override
+          public int compare(FacetResult fr1, FacetResult fr2) {
+            return fr1.getFacetRequest().categoryPath.compareTo(fr2.getFacetRequest().categoryPath);
+          }
+        });
+        Map<CategoryPath, FacetResultNode> mergedNodes = new HashMap<CategoryPath,FacetResultNode>();
+        FacetArrays arrays = dimArrays != null ? dimArrays.get(frs.get(0).getFacetRequest().categoryPath.components[0]) : null;
+        for (FacetResult fr : frs) {
+          FacetResultNode frn = fr.getFacetResultNode();
+          FacetResultNode merged = mergedNodes.get(frn.label);
+          if (merged == null) {
+            CategoryPath parent = frn.label.subpath(frn.label.length - 1);
+            FacetResultNode childNode = frn;
+            FacetResultNode parentNode = null;
+            while (parent.length > 0 && (parentNode = mergedNodes.get(parent)) == null) {
+              int parentOrd = taxoReader.getOrdinal(parent);
+              double parentValue = arrays != null ? fr.getFacetRequest().getValueOf(arrays, parentOrd) : -1;
+              parentNode = new FacetResultNode(parentOrd, parentValue);
+              parentNode.label = parent;
+              parentNode.subResults = new ArrayList<FacetResultNode>();
+              parentNode.subResults.add(childNode);
+              mergedNodes.put(parent, parentNode);
+              childNode = parentNode;
+              parent = parent.subpath(parent.length - 1);
+            }
+
+            // at least one parent was added, so link the final (existing)
+            // parent with the child
+            if (parent.length > 0) {
+              if (!(parentNode.subResults instanceof ArrayList)) {
+                parentNode.subResults = new ArrayList<FacetResultNode>(parentNode.subResults);
+              }
+              parentNode.subResults.add(childNode);
+            }
+
+            // for missing FRNs, add new ones with label and value=-1
+            // first time encountered this label, add it and all its children to
+            // the map.
+            mergedNodes.put(frn.label, frn);
+            for (FacetResultNode child : frn.subResults) {
+              addIfNotExist(mergedNodes, child);
+            }
+          } else {
+            if (!(merged.subResults instanceof ArrayList)) {
+              merged.subResults = new ArrayList<FacetResultNode>(merged.subResults);
+            }
+            for (FacetResultNode sub : frn.subResults) {
+              // make sure sub wasn't already added
+              sub = addIfNotExist(mergedNodes, sub);
+              if (!merged.subResults.contains(sub)) {
+                merged.subResults.add(sub);
+              }
+            }
+          }
+        }
+        
+        // find the 'first' node to put on the FacetResult root
+        CategoryPath min = null;
+        for (CategoryPath cp : mergedNodes.keySet()) {
+          if (min == null || cp.compareTo(min) < 0) {
+            min = cp;
+          }
+        }
+        FacetRequest dummy = new FacetRequest(min, frs.get(0).getFacetRequest().numResults) {
+          @Override
+          public double getValueOf(FacetArrays arrays, int idx) {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+          
+          @Override
+          public FacetArraysSource getFacetArraysSource() {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+        };
+        mergedResult = new FacetResult(dummy, mergedNodes.get(min), -1);
+      }
+      res.add(mergedResult);
+    }
+    return res;
+  }
+
   private final FacetRequest facetRequest;
   private final FacetResultNode rootNode;
   private final int numValidDescendants;
@@ -41,19 +185,15 @@ public class FacetResult {
    * @see FacetRequest#categoryPath
    */
   public final FacetResultNode getFacetResultNode() {
-    return this.rootNode;
+    return rootNode;
   }
   
   /**
    * Number of descendants of {@link #getFacetResultNode() root facet result
-   * node}, up till the requested depth. Typically -- have value != 0. This
-   * number does not include the root node.
-   * 
-   * @see #getFacetRequest()
-   * @see FacetRequest#getDepth()
+   * node}, up till the requested depth.
    */
   public final int getNumValidDescendants() {
-    return this.numValidDescendants;
+    return numValidDescendants;
   }
   
   /**
@@ -62,7 +202,7 @@ public class FacetResult {
   public final FacetRequest getFacetRequest() {
     return this.facetRequest;
   }
-  
+
   /**
    * String representation of this facet result.
    * Use with caution: might return a very long string.

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java Thu May 30 07:53:18 2013
@@ -67,10 +67,6 @@ public class FacetResultNode {
    */
   public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS;
 
-  public FacetResultNode() {
-    // empty constructor
-  }
-  
   public FacetResultNode(int ordinal, double value) {
     this.ordinal = ordinal;
     this.value = value;

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java Thu May 30 07:53:18 2013
@@ -15,8 +15,8 @@ import org.apache.lucene.facet.search.Fa
 import org.apache.lucene.facet.search.FacetRequest.ResultMode;
 import org.apache.lucene.facet.search.FacetRequest.SortOrder;
 import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
 import org.apache.lucene.index.IndexReader;
 
 /*
@@ -58,7 +58,7 @@ public class FacetsAccumulator {
    * constructor.
    */
   public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
-    this(searchParams, indexReader, taxonomyReader, null);
+    this(searchParams, indexReader, taxonomyReader, new FacetArrays(taxonomyReader.getSize()));
   }
 
   /**
@@ -81,6 +81,13 @@ public class FacetsAccumulator {
     return new FacetsAccumulator(fsp, indexReader, taxoReader);
   }
   
+  /** Returns an empty {@link FacetResult}. */
+  protected static FacetResult emptyResult(int ordinal, FacetRequest fr) {
+    FacetResultNode root = new FacetResultNode(ordinal, 0);
+    root.label = fr.categoryPath;
+    return new FacetResult(fr, root, 0);
+  }
+  
   /**
    * Initializes the accumulator with the given parameters as well as
    * {@link FacetArrays}. Note that the accumulator doesn't call
@@ -90,9 +97,6 @@ public class FacetsAccumulator {
    */
   public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader, 
       FacetArrays facetArrays) {
-    if (facetArrays == null) {
-      facetArrays = new FacetArrays(taxonomyReader.getSize());
-    }
     this.facetArrays = facetArrays;
     this.indexReader = indexReader;
     this.taxonomyReader = taxonomyReader;
@@ -173,19 +177,17 @@ public class FacetsAccumulator {
     for (FacetRequest fr : searchParams.facetRequests) {
       int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath);
       if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
-        // Add empty FacetResult:
-        FacetResultNode root = new FacetResultNode();
-        root.ordinal = TaxonomyReader.INVALID_ORDINAL;
-        root.label = fr.categoryPath;
-        root.value = 0;
-        res.add(new FacetResult(fr, root, 0));
+        // Add empty FacetResult
+        res.add(emptyResult(rootOrd, fr));
         continue;
       }
       CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath);
-      OrdinalPolicy ordinalPolicy = clp .getOrdinalPolicy(fr.categoryPath.components[0]);
-      if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
-        // rollup values
-        aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays);
+      if (fr.categoryPath.length > 0) { // someone might ask to aggregate the ROOT category
+        OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
+        if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
+          // rollup values
+          aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays);
+        }
       }
       
       FacetResultsHandler frh = createFacetResultsHandler(fr);
@@ -194,4 +196,7 @@ public class FacetsAccumulator {
     return res;
   }
 
+  public boolean requiresDocScores() {
+    return getAggregator().requiresDocScores();
+  }
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java Thu May 30 07:53:18 2013
@@ -87,7 +87,7 @@ public abstract class FacetsCollector ex
     }
     
     @Override
-    public final void setNextReader(AtomicReaderContext context) throws IOException {
+    protected final void doSetNextReader(AtomicReaderContext context) throws IOException {
       if (bits != null) {
         matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores));
       }
@@ -133,7 +133,7 @@ public abstract class FacetsCollector ex
     public final void setScorer(Scorer scorer) throws IOException {}
     
     @Override
-    public final void setNextReader(AtomicReaderContext context) throws IOException {
+    protected final void doSetNextReader(AtomicReaderContext context) throws IOException {
       if (bits != null) {
         matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, null));
       }
@@ -175,7 +175,7 @@ public abstract class FacetsCollector ex
    * given {@link FacetsAccumulator}.
    */
   public static FacetsCollector create(FacetsAccumulator accumulator) {
-    if (accumulator.getAggregator().requiresDocScores()) {
+    if (accumulator.requiresDocScores()) {
       return new DocsAndScoresCollector(accumulator);
     } else {
       return new DocsOnlyCollector(accumulator);
@@ -183,6 +183,7 @@ public abstract class FacetsCollector ex
   }
 
   private final FacetsAccumulator accumulator;
+  private List<FacetResult> cachedResults;
   
   protected final List<MatchingDocs> matchingDocs = new ArrayList<MatchingDocs>();
 
@@ -196,15 +197,24 @@ public abstract class FacetsCollector ex
    */
   protected abstract void finish();
   
+  /** Performs the actual work of {@link #setNextReader(AtomicReaderContext)}. */
+  protected abstract void doSetNextReader(AtomicReaderContext context) throws IOException;
+  
   /**
    * Returns a {@link FacetResult} per {@link FacetRequest} set in
-   * {@link FacetSearchParams}. Note that if one of the {@link FacetRequest
-   * requests} is for a {@link CategoryPath} that does not exist in the taxonomy,
-   * no matching {@link FacetResult} will be returned.
+   * {@link FacetSearchParams}. Note that if a {@link FacetRequest} defines a
+   * {@link CategoryPath} which does not exist in the taxonomy, an empty
+   * {@link FacetResult} will be returned for it.
    */
   public final List<FacetResult> getFacetResults() throws IOException {
-    finish();
-    return accumulator.accumulate(matchingDocs);
+    // LUCENE-4893: if results are not cached, counts are multiplied as many
+    // times as this method is called. 
+    if (cachedResults == null) {
+      finish();
+      cachedResults = accumulator.accumulate(matchingDocs);
+    }
+    
+    return cachedResults;
   }
   
   /**
@@ -218,12 +228,22 @@ public abstract class FacetsCollector ex
   
   /**
    * Allows to reuse the collector between search requests. This method simply
-   * clears all collected documents (and scores) information, and does not
-   * attempt to reuse allocated memory spaces.
+   * clears all collected documents (and scores) information (as well as cached
+   * results), and does not attempt to reuse allocated memory spaces.
    */
   public final void reset() {
     finish();
     matchingDocs.clear();
+    cachedResults = null;
   }
 
+  @Override
+  public final void setNextReader(AtomicReaderContext context) throws IOException {
+    // clear cachedResults - needed in case someone called getFacetResults()
+    // before doing a search and didn't call reset(). Defensive code to prevent
+    // traps.
+    cachedResults = null;
+    doSetNextReader(context);
+  }
+  
 }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -83,6 +83,7 @@ public final class FastCountingFacetsAgg
           byte b = buf.bytes[offset++];
           if (b >= 0) {
             prev = ord = ((ord << 7) | b) + prev;
+            assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
             ++counts[ord];
             ord = 0;
           } else {

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java Thu May 30 07:53:18 2013
@@ -43,18 +43,19 @@ public final class FloatFacetResultsHand
     return values[ordinal];
   }
 
-  
   @Override
   protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq) {
     FacetResultNode top = pq.top();
     int numResults = 0;
     while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
       float value = values[ordinal];
-      if (value > top.value) {
-        top.value = value;
-        top.ordinal = ordinal;
-        top = pq.updateTop();
+      if (value > 0.0f) {
         ++numResults;
+        if (value > top.value) {
+          top.value = value;
+          top.ordinal = ordinal;
+          top = pq.updateTop();
+        }
       }
       ordinal = siblings[ordinal];
     }
@@ -66,9 +67,8 @@ public final class FloatFacetResultsHand
     while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
       float value = values[ordinal];
       if (value > 0) {
-        FacetResultNode node = new FacetResultNode();
+        FacetResultNode node = new FacetResultNode(ordinal, value);
         node.label = taxonomyReader.getPath(ordinal);
-        node.value = value;
         nodes.add(node);
       }
       ordinal = siblings[ordinal];

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java Thu May 30 07:53:18 2013
@@ -49,11 +49,13 @@ public final class IntFacetResultsHandle
     int numResults = 0;
     while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
       int value = values[ordinal];
-      if (value > top.value) {
-        top.value = value;
-        top.ordinal = ordinal;
-        top = pq.updateTop();
+      if (value > 0) {
         ++numResults;
+        if (value > top.value) {
+          top.value = value;
+          top.ordinal = ordinal;
+          top = pq.updateTop();
+        }
       }
       ordinal = siblings[ordinal];
     }
@@ -65,9 +67,8 @@ public final class IntFacetResultsHandle
     while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
       int value = values[ordinal];
       if (value > 0) {
-        FacetResultNode node = new FacetResultNode();
+        FacetResultNode node = new FacetResultNode(ordinal, value);
         node.label = taxonomyReader.getPath(ordinal);
-        node.value = value;
         nodes.add(node);
       }
       ordinal = siblings[ordinal];

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java Thu May 30 07:53:18 2013
@@ -153,6 +153,11 @@ public class MatchingDocsAsScoredDocIDs 
           }
           
           @Override
+          public long cost() {
+            return size;
+          }
+
+          @Override
           public int advance(int target) throws IOException {
             throw new UnsupportedOperationException("not supported");
           }

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java Thu May 30 07:53:18 2013
@@ -94,7 +94,7 @@ public class StandardFacetsAccumulator e
 
   private Object accumulateGuard;
 
-  private double complementThreshold;
+  private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD;
   
   public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, 
       TaxonomyReader taxonomyReader) {
@@ -198,11 +198,7 @@ public class StandardFacetsAccumulator e
         IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
         if (tmpResult == null) {
           // Add empty FacetResult:
-          FacetResultNode root = new FacetResultNode();
-          root.ordinal = TaxonomyReader.INVALID_ORDINAL;
-          root.label = fr.categoryPath;
-          root.value = 0;
-          res.add(new FacetResult(fr, root, 0));
+          res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr));
           continue;
         }
         FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);

Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -42,11 +42,13 @@ public class SumScoreFacetsAggregator im
     int doc = 0;
     int length = matchingDocs.bits.length();
     float[] scores = facetArrays.getFloatArray();
+    int scoresIdx = 0;
     while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
       cli.getOrdinals(doc, ordinals);
       int upto = ordinals.offset + ordinals.length;
+      final float score = matchingDocs.scores[scoresIdx++];
       for (int i = ordinals.offset; i < upto; i++) {
-        scores[ordinals.ints[i]] += matchingDocs.scores[doc];
+        scores[ordinals.ints[i]] += score;
       }
       ++doc;
     }