You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/07/02 09:12:03 UTC
svn commit: r1498804 [6/8] - in /lucene/dev/branches/lucene4258: lucene/ lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/ lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/ lucene/analysis/common/src/java/org/apache/l...

Modified: lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Tue Jul  2 07:12:00 2013
@@ -126,7 +126,7 @@ public class TestFSTs extends LuceneTest
 
       // FST ord pos int
       {
-        final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+        final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
         final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms2.length);
         for(int idx=0;idx<terms2.length;idx++) {
           pairs.add(new FSTTester.InputOutput<Long>(terms2[idx], (long) idx));
@@ -171,7 +171,7 @@ public class TestFSTs extends LuceneTest
 
     // PositiveIntOutput (ord)
     {
-      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
       final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
       for(int idx=0;idx<terms.length;idx++) {
         pairs.add(new FSTTester.InputOutput<Long>(terms[idx], (long) idx));
@@ -181,8 +181,7 @@ public class TestFSTs extends LuceneTest
 
     // PositiveIntOutput (random monotonically increasing positive number)
     {
-      final boolean doShare = random().nextBoolean();
-      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(doShare);
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
       final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
       long lastOutput = 0;
       for(int idx=0;idx<terms.length;idx++) {
@@ -190,12 +189,12 @@ public class TestFSTs extends LuceneTest
         lastOutput = value;
         pairs.add(new FSTTester.InputOutput<Long>(terms[idx], value));
       }
-      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, doShare).doTest(true);
+      new FSTTester<Long>(random(), dir, inputMode, pairs, outputs, true).doTest(true);
     }
 
     // PositiveIntOutput (random positive number)
     {
-      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random().nextBoolean());
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
       final List<FSTTester.InputOutput<Long>> pairs = new ArrayList<FSTTester.InputOutput<Long>>(terms.length);
       for(int idx=0;idx<terms.length;idx++) {
         pairs.add(new FSTTester.InputOutput<Long>(terms[idx], _TestUtil.nextLong(random(), 0, Long.MAX_VALUE)));
@@ -205,8 +204,8 @@ public class TestFSTs extends LuceneTest
 
     // Pair<ord, (random monotonically increasing positive number>
     {
-      final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(random().nextBoolean());
-      final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(random().nextBoolean());
+      final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
+      final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
       final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
       final List<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>> pairs = new ArrayList<FSTTester.InputOutput<PairOutputs.Pair<Long,Long>>>(terms.length);
       long lastOutput = 0;
@@ -306,7 +305,7 @@ public class TestFSTs extends LuceneTest
     }
     IndexReader r = DirectoryReader.open(writer, true);
     writer.close();
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random().nextBoolean());
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 
     final boolean doRewrite = random().nextBoolean();
 
@@ -653,8 +652,8 @@ public class TestFSTs extends LuceneTest
 
     if (storeOrds && storeDocFreqs) {
       // Store both ord & docFreq:
-      final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton(true);
-      final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton(false);
+      final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
+      final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
       final PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(o1, o2);
       new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
         Random rand;
@@ -669,7 +668,7 @@ public class TestFSTs extends LuceneTest
       }.run(limit, verify, false);
     } else if (storeOrds) {
       // Store only ords
-      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
       new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
         @Override
         public Long getOutput(IntsRef input, int ord) {
@@ -678,7 +677,7 @@ public class TestFSTs extends LuceneTest
       }.run(limit, verify, true);
     } else if (storeDocFreqs) {
       // Store only docFreq
-      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(false);
+      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
       new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
         Random rand;
         @Override
@@ -781,7 +780,7 @@ public class TestFSTs extends LuceneTest
     // smaller FST if the outputs grow monotonically.  But
     // if numbers are "random", false should give smaller
     // final size:
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 
     // Build an FST mapping BytesRef -> Long
     final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
@@ -1100,7 +1099,7 @@ public class TestFSTs extends LuceneTest
   }
 
   public void testFinalOutputOnEndState() throws Exception {
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 
     final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
     builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
@@ -1115,7 +1114,7 @@ public class TestFSTs extends LuceneTest
   }
 
   public void testInternalFinalState() throws Exception {
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     final boolean willRewrite = random().nextBoolean();
     final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);
     builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
@@ -1136,7 +1135,7 @@ public class TestFSTs extends LuceneTest
   // Make sure raw FST can differentiate between final vs
   // non-final end nodes
   public void testNonFinalStopNode() throws Exception {
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     final Long nothing = outputs.getNoOutput();
     final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
 
@@ -1216,7 +1215,7 @@ public class TestFSTs extends LuceneTest
   };
 
   public void testShortestPaths() throws Exception {
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
 
     final IntsRef scratch = new IntsRef();
@@ -1258,8 +1257,8 @@ public class TestFSTs extends LuceneTest
   public void testShortestPathsWFST() throws Exception {
 
     PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
-        PositiveIntOutputs.getSingleton(true), // weight
-        PositiveIntOutputs.getSingleton(true)  // output
+        PositiveIntOutputs.getSingleton(), // weight
+        PositiveIntOutputs.getSingleton()  // output
     );
     
     final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);
@@ -1301,7 +1300,7 @@ public class TestFSTs extends LuceneTest
     final TreeMap<String,Long> slowCompletor = new TreeMap<String,Long>();
     final TreeSet<String> allPrefixes = new TreeSet<String>();
     
-    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
     final IntsRef scratch = new IntsRef();
     
@@ -1416,8 +1415,8 @@ public class TestFSTs extends LuceneTest
     final TreeSet<String> allPrefixes = new TreeSet<String>();
     
     PairOutputs<Long,Long> outputs = new PairOutputs<Long,Long>(
-        PositiveIntOutputs.getSingleton(true), // weight
-        PositiveIntOutputs.getSingleton(true)  // output
+        PositiveIntOutputs.getSingleton(), // weight
+        PositiveIntOutputs.getSingleton()  // output
     );
     final Builder<Pair<Long,Long>> builder = new Builder<Pair<Long,Long>>(FST.INPUT_TYPE.BYTE1, outputs);
     final IntsRef scratch = new IntsRef();

Modified: lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestFailOnFieldCacheInsanity.java Tue Jul  2 07:12:00 2013
@@ -69,6 +69,7 @@ public class TestFailOnFieldCacheInsanit
     for(Failure f : r.getFailures()) {
       if (f.getMessage().indexOf("Insane") != -1) {
         insane = true;
+        break;
       }
     }
     Assert.assertTrue(insane);

Modified: lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Tue Jul  2 07:12:00 2013
@@ -659,6 +659,61 @@ public class TestPackedInts extends Luce
     assertEquals(1 << 10, wrt.get(valueCount - 1));
   }
 
+  public void testPagedGrowableWriter() {
+    int pageSize = 1 << (_TestUtil.nextInt(random(), 6, 30));
+    // supports 0 values?
+    PagedGrowableWriter writer = new PagedGrowableWriter(0, pageSize, _TestUtil.nextInt(random(), 1, 64), random().nextFloat());
+    assertEquals(0, writer.size());
+
+    // compare against AppendingLongBuffer
+    AppendingLongBuffer buf = new AppendingLongBuffer();
+    int size = random().nextInt(1000000);
+    long max = 5;
+    for (int i = 0; i < size; ++i) {
+      buf.add(_TestUtil.nextLong(random(), 0, max));
+      if (rarely()) {
+        max = PackedInts.maxValue(rarely() ? _TestUtil.nextInt(random(), 0, 63) : _TestUtil.nextInt(random(), 0, 31));
+      }
+    }
+    writer = new PagedGrowableWriter(size, pageSize, _TestUtil.nextInt(random(), 1, 64), random().nextFloat());
+    assertEquals(size, writer.size());
+    for (int i = size - 1; i >= 0; --i) {
+      writer.set(i, buf.get(i));
+    }
+    for (int i = 0; i < size; ++i) {
+      assertEquals(buf.get(i), writer.get(i));
+    }
+
+    // test copy
+    PagedGrowableWriter copy = writer.resize(_TestUtil.nextLong(random(), writer.size() / 2, writer.size() * 3 / 2));
+    for (long i = 0; i < copy.size(); ++i) {
+      if (i < writer.size()) {
+        assertEquals(writer.get(i), copy.get(i));
+      } else {
+        assertEquals(0, copy.get(i));
+      }
+    }
+  }
+
+  // memory hole
+  @Ignore
+  public void testPagedGrowableWriterOverflow() {
+    final long size = _TestUtil.nextLong(random(), 2 * (long) Integer.MAX_VALUE, 3 * (long) Integer.MAX_VALUE);
+    final int pageSize = 1 << (_TestUtil.nextInt(random(), 16, 30));
+    final PagedGrowableWriter writer = new PagedGrowableWriter(size, pageSize, 1, random().nextFloat());
+    final long index = _TestUtil.nextLong(random(), (long) Integer.MAX_VALUE, size - 1);
+    writer.set(index, 2);
+    assertEquals(2, writer.get(index));
+    for (int i = 0; i < 1000000; ++i) {
+      final long idx = _TestUtil.nextLong(random(), 0, size);
+      if (idx == index) {
+        assertEquals(2, writer.get(idx));
+      } else {
+        assertEquals(0, writer.get(idx));
+      }
+    }
+  }
+
   public void testSave() throws IOException {
     final int valueCount = _TestUtil.nextInt(random(), 1, 2048);
     for (int bpv = 1; bpv <= 64; ++bpv) {
@@ -808,13 +863,15 @@ public class TestPackedInts extends Luce
     final long[] arr = new long[RandomInts.randomIntBetween(random(), 1, 1000000)];
     for (int bpv : new int[] {0, 1, 63, 64, RandomInts.randomIntBetween(random(), 2, 62)}) {
       for (boolean monotonic : new boolean[] {true, false}) {
+        final int pageSize = 1 << _TestUtil.nextInt(random(), 6, 20);
+        final int initialPageCount = _TestUtil.nextInt(random(), 0, 16);
         AbstractAppendingLongBuffer buf;
         final int inc;
         if (monotonic) {
-          buf = new MonotonicAppendingLongBuffer();
+          buf = new MonotonicAppendingLongBuffer(initialPageCount, pageSize);
           inc = _TestUtil.nextInt(random(), -1000, 1000);
         } else {
-          buf = new AppendingLongBuffer();
+          buf = new AppendingLongBuffer(initialPageCount, pageSize);
           inc = 0;
         }
         if (bpv == 0) {

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java Tue Jul  2 07:12:00 2013
@@ -64,7 +64,7 @@ public class RangeAccumulator extends Fa
         throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed");
       }
 
-      RangeFacetRequest<?> rfr = (RangeFacetRequest) fr;
+      RangeFacetRequest<?> rfr = (RangeFacetRequest<?>) fr;
 
       requests.add(new RangeSet(rfr.ranges, rfr.categoryPath.components[0]));
     }
@@ -86,8 +86,11 @@ public class RangeAccumulator extends Fa
       RangeSet ranges = requests.get(i);
 
       int[] counts = new int[ranges.ranges.length];
-      for(MatchingDocs hits : matchingDocs) {
+      for (MatchingDocs hits : matchingDocs) {
         NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field);
+        if (ndv == null) {
+          continue; // no numeric values for this field in this reader
+        }
         final int length = hits.bits.length();
         int doc = 0;
         while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java Tue Jul  2 07:12:00 2013
@@ -3,6 +3,7 @@ package org.apache.lucene.facet.sampling
 import java.io.IOException;
 
 import org.apache.lucene.facet.search.FacetResult;
+import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 
 /*
@@ -23,22 +24,50 @@ import org.apache.lucene.facet.search.Sc
  */
 
 /**
- * Fixer of sample facet accumulation results
+ * Fixer of sample facet accumulation results.
  * 
  * @lucene.experimental
  */
-public interface SampleFixer {
+public abstract class SampleFixer {
   
   /**
    * Alter the input result, fixing it to account for the sampling. This
-   * implementation can compute accurate or estimated counts for the sampled facets. 
-   * For example, a faster correction could just multiply by a compensating factor.
+   * implementation can compute accurate or estimated counts for the sampled
+   * facets. For example, a faster correction could just multiply by a
+   * compensating factor.
    * 
    * @param origDocIds
    *          full set of matching documents.
    * @param fres
    *          sample result to be fixed.
-   * @throws IOException If there is a low-level I/O error.
+   * @throws IOException
+   *           If there is a low-level I/O error.
    */
-  public void fixResult(ScoredDocIDs origDocIds, FacetResult fres) throws IOException; 
+  public void fixResult(ScoredDocIDs origDocIds, FacetResult fres, double samplingRatio) throws IOException {
+    FacetResultNode topRes = fres.getFacetResultNode();
+    fixResultNode(topRes, origDocIds, samplingRatio);
+  }
+  
+  /**
+   * Fix result node count, and, recursively, fix all its children
+   * 
+   * @param facetResNode
+   *          result node to be fixed
+   * @param docIds
+   *          docids in effect
+   * @throws IOException
+   *           If there is a low-level I/O error.
+   */
+  protected void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) 
+      throws IOException {
+    singleNodeFix(facetResNode, docIds, samplingRatio);
+    for (FacetResultNode frn : facetResNode.subResults) {
+      fixResultNode(frn, docIds, samplingRatio);
+    }
+  }
+  
+  /** Fix the given node's value. */
+  protected abstract void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) 
+      throws IOException;
+  
 }
\ No newline at end of file

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java Tue Jul  2 07:12:00 2013
@@ -12,7 +12,6 @@ import org.apache.lucene.facet.search.Fa
 import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.IndexReader;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -111,16 +110,6 @@ public abstract class Sampler {
       throws IOException;
 
   /**
-   * Get a fixer of sample facet accumulation results. Default implementation
-   * returns a <code>TakmiSampleFixer</code> which is adequate only for
-   * counting. For any other accumulator, provide a different fixer.
-   */
-  public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
-      FacetSearchParams searchParams) {
-    return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
-  }
-  
-  /**
    * Result of sample computation
    */
   public final static class SampleResult {
@@ -220,7 +209,7 @@ public abstract class Sampler {
       super(orig.categoryPath, num);
       this.orig = orig;
       setDepth(orig.getDepth());
-      setNumLabel(orig.getNumLabel());
+      setNumLabel(0); // don't label anything as we're over-sampling
       setResultMode(orig.getResultMode());
       setSortOrder(orig.getSortOrder());
     }

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java Tue Jul  2 07:12:00 2013
@@ -79,30 +79,43 @@ public class SamplingAccumulator extends
   public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
     // Replacing the original searchParams with the over-sampled
     FacetSearchParams original = searchParams;
-    searchParams = sampler.overSampledSearchParams(original);
+    SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
+    final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+    if (shouldOversample) {
+      searchParams = sampler.overSampledSearchParams(original);
+    }
     
     List<FacetResult> sampleRes = super.accumulate(docids);
     
-    List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+    List<FacetResult> results = new ArrayList<FacetResult>();
     for (FacetResult fres : sampleRes) {
       // for sure fres is not null because this is guaranteed by the delegee.
       PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
-      // fix the result of current request
-      sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres);
-      
-      fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
-
-      // Using the sampler to trim the extra (over-sampled) results
-      fres = sampler.trimResult(fres);
+      if (samplerFixer != null) {
+        // fix the result of current request
+        samplerFixer.fixResult(docids, fres, samplingRatio);
+        
+        fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
 
+        if (shouldOversample) {
+          // Using the sampler to trim the extra (over-sampled) results
+          fres = sampler.trimResult(fres);
+        }
+      }
+      
       // final labeling if allowed (because labeling is a costly operation)
-      frh.labelResult(fres);
-      fixedRes.add(fres); // add to final results
+      if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
+        // category does not exist, add an empty result
+        results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
+      } else {
+        frh.labelResult(fres);
+        results.add(fres);
+      }
     }
     
     searchParams = original; // Back to original params
     
-    return fixedRes; 
+    return results; 
   }
 
   @Override

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java Tue Jul  2 07:12:00 2013
@@ -28,7 +28,7 @@ public class SamplingParams {
    * Default factor by which more results are requested over the sample set.
    * @see SamplingParams#getOversampleFactor()
    */
-  public static final double DEFAULT_OVERSAMPLE_FACTOR = 2d;
+  public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
   
   /**
    * Default ratio between size of sample to original size of document set.
@@ -59,6 +59,8 @@ public class SamplingParams {
   private double sampleRatio = DEFAULT_SAMPLE_RATIO;
   private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
   private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
+
+  private SampleFixer sampleFixer = null;
   
   /**
    * Return the maxSampleSize.
@@ -166,4 +168,29 @@ public class SamplingParams {
     this.oversampleFactor = oversampleFactor;
   }
 
-}
\ No newline at end of file
+  /**
+   * @return {@link SampleFixer} to be used while fixing the sampled results, if
+   *         <code>null</code> no fixing will be performed
+   */
+  public SampleFixer getSampleFixer() {
+    return sampleFixer;
+  }
+
+  /**
+   * Set a {@link SampleFixer} to be used while fixing the sampled results.
+   * {@code null} means no fixing will be performed
+   */
+  public void setSampleFixer(SampleFixer sampleFixer) {
+    this.sampleFixer = sampleFixer;
+  }
+
+  /**
+   * Returns whether over-sampling should be done. By default returns
+   * {@code true} when {@link #getSampleFixer()} is not {@code null} and
+   * {@link #getOversampleFactor()} &gt; 1, {@code false} otherwise.
+   */
+  public boolean shouldOverSample() {
+    return sampleFixer != null && oversampleFactor > 1d;
+  }
+  
+}

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java Tue Jul  2 07:12:00 2013
@@ -10,6 +10,7 @@ import org.apache.lucene.facet.sampling.
 import org.apache.lucene.facet.search.FacetResult;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -52,31 +53,48 @@ public class SamplingWrapper extends Sta
   public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
     // Replacing the original searchParams with the over-sampled (and without statistics-compute)
     FacetSearchParams original = delegee.searchParams;
-    delegee.searchParams = sampler.overSampledSearchParams(original);
+    boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+   
+    if (shouldOversample) {
+      delegee.searchParams = sampler.overSampledSearchParams(original);
+    }
     
     SampleResult sampleSet = sampler.getSampleSet(docids);
 
     List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
 
-    List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+    List<FacetResult> results = new ArrayList<FacetResult>();
+    SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
+    
     for (FacetResult fres : sampleRes) {
       // for sure fres is not null because this is guaranteed by the delegee.
       PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
-      // fix the result of current request
-      sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres); 
-      fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+      if (sampleFixer != null) {
+        // fix the result of current request
+        sampleFixer.fixResult(docids, fres, sampleSet.actualSampleRatio); 
+        fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+      }
       
-      // Using the sampler to trim the extra (over-sampled) results
-      fres = sampler.trimResult(fres);
+      if (shouldOversample) {
+        // Using the sampler to trim the extra (over-sampled) results
+        fres = sampler.trimResult(fres);
+      }
       
       // final labeling if allowed (because labeling is a costly operation)
-      frh.labelResult(fres);
-      fixedRes.add(fres); // add to final results
+      if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
+        // category does not exist, add an empty result
+        results.add(emptyResult(fres.getFacetResultNode().ordinal, fres.getFacetRequest()));
+      } else {
+        frh.labelResult(fres);
+        results.add(fres);
+      }
     }
 
-    delegee.searchParams = original; // Back to original params
+    if (shouldOversample) {
+      delegee.searchParams = original; // Back to original params
+    }
     
-    return fixedRes; 
+    return results; 
   }
 
   @Override

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java Tue Jul  2 07:12:00 2013
@@ -2,21 +2,19 @@ package org.apache.lucene.facet.sampling
 
 import java.io.IOException;
 
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.Bits;
-
 import org.apache.lucene.facet.params.FacetSearchParams;
 import org.apache.lucene.facet.search.DrillDownQuery;
-import org.apache.lucene.facet.search.FacetResult;
 import org.apache.lucene.facet.search.FacetResultNode;
 import org.apache.lucene.facet.search.ScoredDocIDs;
 import org.apache.lucene.facet.search.ScoredDocIDsIterator;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,16 +34,21 @@ import org.apache.lucene.facet.taxonomy.
  */
 
 /**
- * Fix sampling results by counting the intersection between two lists: a
- * TermDocs (list of documents in a certain category) and a DocIdSetIterator
- * (list of documents matching the query).
- * 
+ * Fix sampling results by correct results, by counting the intersection between
+ * two lists: a TermDocs (list of documents in a certain category) and a
+ * DocIdSetIterator (list of documents matching the query).
+ * <p>
+ * This fixer is suitable for scenarios which prioritize accuracy over
+ * performance. 
+ * <p>
+ * <b>Note:</b> for statistically more accurate top-k selection, set
+ * {@link SamplingParams#setOversampleFactor(double) oversampleFactor} to at
+ * least 2, so that the top-k categories would have better chance of showing up
+ * in the sampled top-cK results (see {@link SamplingParams#getOversampleFactor}
  * 
  * @lucene.experimental
  */
-// TODO (Facet): implement also an estimated fixing by ratio (taking into
-// account "translation" of counts!)
-class TakmiSampleFixer implements SampleFixer {
+public class TakmiSampleFixer extends SampleFixer {
   
   private TaxonomyReader taxonomyReader;
   private IndexReader indexReader;
@@ -59,28 +62,10 @@ class TakmiSampleFixer implements Sample
   }
 
   @Override
-  public void fixResult(ScoredDocIDs origDocIds, FacetResult fres)
-      throws IOException {
-    FacetResultNode topRes = fres.getFacetResultNode();
-    fixResultNode(topRes, origDocIds);
-  }
-  
-  /**
-   * Fix result node count, and, recursively, fix all its children
-   * 
-   * @param facetResNode
-   *          result node to be fixed
-   * @param docIds
-   *          docids in effect
-   * @throws IOException If there is a low-level I/O error.
-   */
-  private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) throws IOException {
+  public void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) throws IOException {
     recount(facetResNode, docIds);
-    for (FacetResultNode frn : facetResNode.subResults) {
-      fixResultNode(frn, docIds);
-    }
   }
-
+  
   /**
    * Internal utility: recount for a facet result node
    * 
@@ -179,4 +164,5 @@ class TakmiSampleFixer implements Sample
     }
     return false; // exhausted
   }
+
 }
\ No newline at end of file

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java Tue Jul  2 07:12:00 2013
@@ -402,16 +402,20 @@ public class DrillSideways {
       query = new DrillDownQuery(filter, query);
     }
     if (sort != null) {
+      int limit = searcher.getIndexReader().maxDoc();
+      if (limit == 0) {
+        limit = 1; // the collector does not alow numHits = 0
+      }
+      topN = Math.min(topN, limit);
       final TopFieldCollector hitCollector = TopFieldCollector.create(sort,
-                                                                      Math.min(topN, searcher.getIndexReader().maxDoc()),
+                                                                      topN,
                                                                       after,
                                                                       true,
                                                                       doDocScores,
                                                                       doMaxScore,
                                                                       true);
       DrillSidewaysResult r = search(query, hitCollector, fsp);
-      r.hits = hitCollector.topDocs();
-      return r;
+      return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
     } else {
       return search(after, query, topN, fsp);
     }
@@ -423,10 +427,14 @@ public class DrillSideways {
    */
   public DrillSidewaysResult search(ScoreDoc after,
                                     DrillDownQuery query, int topN, FacetSearchParams fsp) throws IOException {
-    TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), after, true);
+    int limit = searcher.getIndexReader().maxDoc();
+    if (limit == 0) {
+      limit = 1; // the collector does not alow numHits = 0
+    }
+    topN = Math.min(topN, limit);
+    TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after, true);
     DrillSidewaysResult r = search(query, hitCollector, fsp);
-    r.hits = hitCollector.topDocs();
-    return r;
+    return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
   }
 
   /** Override this to use a custom drill-down {@link
@@ -454,16 +462,20 @@ public class DrillSideways {
     return false;
   }
 
-  /** Represents the returned result from a drill sideways
-   *  search. */
+  /**
+   * Represents the returned result from a drill sideways search. Note that if
+   * you called
+   * {@link DrillSideways#search(DrillDownQuery, Collector, FacetSearchParams)},
+   * then {@link #hits} will be {@code null}.
+   */
   public static class DrillSidewaysResult {
     /** Combined drill down & sideways results. */
     public final List<FacetResult> facetResults;
 
     /** Hits. */
-    public TopDocs hits;
+    public final TopDocs hits;
 
-    DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
+    public DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
       this.facetResults = facetResults;
       this.hits = hits;
     }

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java Tue Jul  2 07:12:00 2013
@@ -1,5 +1,16 @@
 package org.apache.lucene.facet.search;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.CollectionUtil;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +35,140 @@ package org.apache.lucene.facet.search;
  */
 public class FacetResult {
   
+  private static FacetResultNode addIfNotExist(Map<CategoryPath, FacetResultNode> nodes, FacetResultNode node) {
+    FacetResultNode n = nodes.get(node.label);
+    if (n == null) {
+      nodes.put(node.label, node);
+      n = node;
+    }
+    return n;
+  }
+
+  /**
+   * A utility for merging multiple {@link FacetResult} of the same
+   * (hierarchical) dimension into a single {@link FacetResult}, to reconstruct
+   * the hierarchy. The results are merged according to the following rules:
+   * <ul>
+   * <li>If two results share the same dimension (first component in their
+   * {@link CategoryPath}), they are merged.
+   * <li>If a result is missing ancestors in the other results, e.g. A/B/C but
+   * no corresponding A or A/B, these nodes are 'filled' with their label,
+   * ordinal and value (obtained from the respective {@link FacetArrays}).
+   * <li>If a result does not share a dimension with other results, it is
+   * returned as is.
+   * </ul>
+   * <p>
+   * <b>NOTE:</b> the returned results are not guaranteed to be in the same
+   * order of the input ones.
+   * 
+   * @param results
+   *          the results to merge
+   * @param taxoReader
+   *          the {@link TaxonomyReader} to use when creating missing ancestor
+   *          nodes
+   * @param dimArrays
+   *          a mapping from a dimension to the respective {@link FacetArrays}
+   *          from which to pull the nodes values
+   */
+  public static List<FacetResult> mergeHierarchies(List<FacetResult> results, TaxonomyReader taxoReader,
+      Map<String, FacetArrays> dimArrays) throws IOException {
+    final Map<String, List<FacetResult>> dims = new HashMap<String,List<FacetResult>>();
+    for (FacetResult fr : results) {
+      String dim = fr.getFacetRequest().categoryPath.components[0];
+      List<FacetResult> frs = dims.get(dim);
+      if (frs == null) {
+        frs = new ArrayList<FacetResult>();
+        dims.put(dim, frs);
+      }
+      frs.add(fr);
+    }
+
+    final List<FacetResult> res = new ArrayList<FacetResult>();
+    for (List<FacetResult> frs : dims.values()) {
+      FacetResult mergedResult = frs.get(0);
+      if (frs.size() > 1) {
+        CollectionUtil.introSort(frs, new Comparator<FacetResult>() {
+          @Override
+          public int compare(FacetResult fr1, FacetResult fr2) {
+            return fr1.getFacetRequest().categoryPath.compareTo(fr2.getFacetRequest().categoryPath);
+          }
+        });
+        Map<CategoryPath, FacetResultNode> mergedNodes = new HashMap<CategoryPath,FacetResultNode>();
+        FacetArrays arrays = dimArrays != null ? dimArrays.get(frs.get(0).getFacetRequest().categoryPath.components[0]) : null;
+        for (FacetResult fr : frs) {
+          FacetResultNode frn = fr.getFacetResultNode();
+          FacetResultNode merged = mergedNodes.get(frn.label);
+          if (merged == null) {
+            CategoryPath parent = frn.label.subpath(frn.label.length - 1);
+            FacetResultNode childNode = frn;
+            FacetResultNode parentNode = null;
+            while (parent.length > 0 && (parentNode = mergedNodes.get(parent)) == null) {
+              int parentOrd = taxoReader.getOrdinal(parent);
+              double parentValue = arrays != null ? fr.getFacetRequest().getValueOf(arrays, parentOrd) : -1;
+              parentNode = new FacetResultNode(parentOrd, parentValue);
+              parentNode.label = parent;
+              parentNode.subResults = new ArrayList<FacetResultNode>();
+              parentNode.subResults.add(childNode);
+              mergedNodes.put(parent, parentNode);
+              childNode = parentNode;
+              parent = parent.subpath(parent.length - 1);
+            }
+
+            // at least one parent was added, so link the final (existing)
+            // parent with the child
+            if (parent.length > 0) {
+              if (!(parentNode.subResults instanceof ArrayList)) {
+                parentNode.subResults = new ArrayList<FacetResultNode>(parentNode.subResults);
+              }
+              parentNode.subResults.add(childNode);
+            }
+
+            // for missing FRNs, add new ones with label and value=-1
+            // first time encountered this label, add it and all its children to
+            // the map.
+            mergedNodes.put(frn.label, frn);
+            for (FacetResultNode child : frn.subResults) {
+              addIfNotExist(mergedNodes, child);
+            }
+          } else {
+            if (!(merged.subResults instanceof ArrayList)) {
+              merged.subResults = new ArrayList<FacetResultNode>(merged.subResults);
+            }
+            for (FacetResultNode sub : frn.subResults) {
+              // make sure sub wasn't already added
+              sub = addIfNotExist(mergedNodes, sub);
+              if (!merged.subResults.contains(sub)) {
+                merged.subResults.add(sub);
+              }
+            }
+          }
+        }
+        
+        // find the 'first' node to put on the FacetResult root
+        CategoryPath min = null;
+        for (CategoryPath cp : mergedNodes.keySet()) {
+          if (min == null || cp.compareTo(min) < 0) {
+            min = cp;
+          }
+        }
+        FacetRequest dummy = new FacetRequest(min, frs.get(0).getFacetRequest().numResults) {
+          @Override
+          public double getValueOf(FacetArrays arrays, int idx) {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+          
+          @Override
+          public FacetArraysSource getFacetArraysSource() {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+        };
+        mergedResult = new FacetResult(dummy, mergedNodes.get(min), -1);
+      }
+      res.add(mergedResult);
+    }
+    return res;
+  }
+
   private final FacetRequest facetRequest;
   private final FacetResultNode rootNode;
   private final int numValidDescendants;

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java Tue Jul  2 07:12:00 2013
@@ -94,7 +94,7 @@ public class StandardFacetsAccumulator e
 
   private Object accumulateGuard;
 
-  private double complementThreshold;
+  private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD;
   
   public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, 
       TaxonomyReader taxonomyReader) {

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/complements/TestFacetsAccumulatorWithComplement.java Tue Jul  2 07:12:00 2013
@@ -121,8 +121,8 @@ public class TestFacetsAccumulatorWithCo
     
     // Results are ready, printing them...
     int i = 0;
-    for (FacetResult facetResult : res) {
-      if (VERBOSE) {
+    if (VERBOSE) {
+      for (FacetResult facetResult : res) {
         System.out.println("Res "+(i++)+": "+facetResult);
       }
     }

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/BaseSampleTestTopK.java Tue Jul  2 07:12:00 2013
@@ -94,7 +94,7 @@ public abstract class BaseSampleTestTopK
         for (int nTrial = 0; nTrial < RETRIES; nTrial++) {
           try {
             // complement with sampling!
-            final Sampler sampler = createSampler(nTrial, useRandomSampler);
+            final Sampler sampler = createSampler(nTrial, useRandomSampler, samplingSearchParams);
             
             assertSampling(expectedResults, q, sampler, samplingSearchParams, false);
             assertSampling(expectedResults, q, sampler, samplingSearchParams, true);
@@ -128,14 +128,20 @@ public abstract class BaseSampleTestTopK
     return FacetsCollector.create(sfa);
   }
   
-  private Sampler createSampler(int nTrial, boolean useRandomSampler) {
+  private Sampler createSampler(int nTrial, boolean useRandomSampler, FacetSearchParams sParams) {
     SamplingParams samplingParams = new SamplingParams();
     
+    /*
+     * Set sampling to Exact fixing with TakmiSampleFixer as it is not easy to
+     * validate results with amortized results. 
+     */
+    samplingParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, sParams));
+        
     final double retryFactor = Math.pow(1.01, nTrial);
+    samplingParams.setOversampleFactor(5.0 * retryFactor); // Oversampling 
     samplingParams.setSampleRatio(0.8 * retryFactor);
     samplingParams.setMinSampleSize((int) (100 * retryFactor));
     samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
-    samplingParams.setOversampleFactor(5.0 * retryFactor);
     samplingParams.setSamplingThreshold(11000); //force sampling
 
     Sampler sampler = useRandomSampler ? 

Added: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java?rev=1498804&view=auto
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java (added)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/sampling/SamplerTest.java Tue Jul  2 07:12:00 2013
@@ -0,0 +1,111 @@
+package org.apache.lucene.facet.sampling;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.facet.FacetTestBase;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.CountFacetRequest;
+import org.apache.lucene.facet.search.FacetResultNode;
+import org.apache.lucene.facet.search.FacetsCollector;
+import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.junit.After;
+import org.junit.Before;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SamplerTest extends FacetTestBase {
+  
+  private FacetIndexingParams fip;
+  
+  @Override
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    fip = getFacetIndexingParams(Integer.MAX_VALUE);
+    initIndex(fip);
+  }
+  
+  @Override
+  protected int numDocsToIndex() {
+    return 100;
+  }
+  
+  @Override
+  protected List<CategoryPath> getCategories(final int doc) {
+    return new ArrayList<CategoryPath>() {
+      {
+        add(new CategoryPath("root", "a", Integer.toString(doc % 10)));
+      }
+    };
+  }
+  
+  @Override
+  protected String getContent(int doc) {
+    return "";
+  }
+  
+  @Override
+  @After
+  public void tearDown() throws Exception {
+    closeAll();
+    super.tearDown();
+  }
+  
+  public void testDefaultFixer() throws Exception {
+    RandomSampler randomSampler = new RandomSampler();
+    SampleFixer fixer = randomSampler.samplingParams.getSampleFixer();
+    assertEquals(null, fixer);
+  }
+  
+  public void testCustomFixer() throws Exception {
+    SamplingParams sp = new SamplingParams();
+    sp.setSampleFixer(new TakmiSampleFixer(null, null, null));
+    assertEquals(TakmiSampleFixer.class, sp.getSampleFixer().getClass());
+  }
+  
+  public void testNoFixing() throws Exception {
+    SamplingParams sp = new SamplingParams();
+    sp.setMaxSampleSize(10);
+    sp.setMinSampleSize(5);
+    sp.setSampleRatio(0.01d);
+    sp.setSamplingThreshold(50);
+    sp.setOversampleFactor(5d);
+    
+    assertNull("Fixer should be null as the test is for no-fixing",
+        sp.getSampleFixer());
+    FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(
+        new CategoryPath("root", "a"), 1));
+    SamplingAccumulator accumulator = new SamplingAccumulator(
+        new RandomSampler(sp, random()), fsp, indexReader, taxoReader);
+    
+    // Make sure no complements are in action
+    accumulator
+        .setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT);
+    
+    FacetsCollector fc = FacetsCollector.create(accumulator);
+    
+    searcher.search(new MatchAllDocsQuery(), fc);
+    FacetResultNode node = fc.getFacetResults().get(0).getFacetResultNode();
+    
+    assertTrue(node.value < numDocsToIndex());
+  }
+}

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java Tue Jul  2 07:12:00 2013
@@ -23,7 +23,7 @@ import org.junit.Test;
  */
 
 public class FacetRequestTest extends FacetTestCase {
-
+  
   @Test(expected=IllegalArgumentException.class)
   public void testIllegalNumResults() throws Exception {
     assertNotNull(new CountFacetRequest(new CategoryPath("a", "b"), 0));
@@ -33,7 +33,7 @@ public class FacetRequestTest extends Fa
   public void testIllegalCategoryPath() throws Exception {
     assertNotNull(new CountFacetRequest(null, 1));
   }
-
+  
   @Test
   public void testHashAndEquals() {
     CountFacetRequest fr1 = new CountFacetRequest(new CategoryPath("a"), 8);

Added: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java?rev=1498804&view=auto
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java (added)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java Tue Jul  2 07:12:00 2013
@@ -0,0 +1,204 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.FacetTestCase;
+import org.apache.lucene.facet.FacetTestUtils;
+import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.IOUtils;
+import org.junit.Test;
+
+public class FacetResultTest extends FacetTestCase {
+  
+  private Document newDocument(FacetFields facetFields, String... categories) throws IOException {
+    Document doc = new Document();
+    List<CategoryPath> cats = new ArrayList<CategoryPath>();
+    for (String cat : categories) {
+      cats.add(new CategoryPath(cat, '/'));
+    }
+    facetFields.addFields(doc, cats);
+    return doc;
+  }
+  
+  private void initIndex(Directory indexDir, Directory taxoDir) throws IOException {
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+    FacetFields facetFields = new FacetFields(taxoWriter);
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/March/12", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/March/23", "A/2"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/April/17", "A/3"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2010/May/18", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2011/January/1", "A/3"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2011/February/12", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2011/February/18", "A/4"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2012/August/15", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2012/July/5", "A/2"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2013/September/13", "A/1"));
+    indexWriter.addDocument(newDocument(facetFields, "Date/2013/September/25", "A/4"));
+    IOUtils.close(indexWriter, taxoWriter);
+  }
+  
+  private void searchIndex(TaxonomyReader taxoReader, IndexSearcher searcher, boolean fillMissingCounts, String[] exp,
+      String[][] drillDowns, int[] numResults) throws IOException {
+    CategoryPath[][] cps = new CategoryPath[drillDowns.length][];
+    for (int i = 0; i < cps.length; i++) {
+      cps[i] = new CategoryPath[drillDowns[i].length];
+      for (int j = 0; j < cps[i].length; j++) {
+        cps[i][j] = new CategoryPath(drillDowns[i][j], '/');
+      }
+    }
+    DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
+    for (CategoryPath[] cats : cps) {
+      ddq.add(cats);
+    }
+    
+    List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
+    for (CategoryPath[] cats : cps) {
+      for (int i = 0; i < cats.length; i++) {
+        CategoryPath cp = cats[i];
+        int numres = numResults == null ? 2 : numResults[i];
+        // for each drill-down, add itself as well as its parent as requests, so
+        // we get the drill-sideways
+        facetRequests.add(new CountFacetRequest(cp, numres));
+        CountFacetRequest parent = new CountFacetRequest(cp.subpath(cp.length - 1), numres);
+        if (!facetRequests.contains(parent) && parent.categoryPath.length > 0) {
+          facetRequests.add(parent);
+        }
+      }
+    }
+    
+    FacetSearchParams fsp = new FacetSearchParams(facetRequests);
+    final DrillSideways ds;
+    final Map<String,FacetArrays> dimArrays;
+    if (fillMissingCounts) {
+      dimArrays = new HashMap<String,FacetArrays>();
+      ds = new DrillSideways(searcher, taxoReader) {
+        @Override
+        protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException {
+          FacetsAccumulator fa = super.getDrillSidewaysAccumulator(dim, fsp);
+          dimArrays.put(dim, fa.facetArrays);
+          return fa;
+        }
+      };
+    } else {
+      ds = new DrillSideways(searcher, taxoReader);
+      dimArrays = null;
+    }
+    
+    final DrillSidewaysResult sidewaysRes = ds.search(null, ddq, 5, fsp);
+    List<FacetResult> facetResults = FacetResult.mergeHierarchies(sidewaysRes.facetResults, taxoReader, dimArrays);
+    CollectionUtil.introSort(facetResults, new Comparator<FacetResult>() {
+      @Override
+      public int compare(FacetResult o1, FacetResult o2) {
+        return o1.getFacetRequest().categoryPath.compareTo(o2.getFacetRequest().categoryPath);
+      }
+    });
+    assertEquals(exp.length, facetResults.size()); // A + single one for date
+    for (int i = 0; i < facetResults.size(); i++) {
+      assertEquals(exp[i], FacetTestUtils.toSimpleString(facetResults.get(i)));
+    }
+  }
+  
+  @Test
+  public void testMergeHierarchies() throws Exception {
+    Directory indexDir = new RAMDirectory(), taxoDir = new RAMDirectory();
+    initIndex(indexDir, taxoDir);
+    
+    DirectoryReader indexReader = DirectoryReader.open(indexDir);
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
+    IndexSearcher searcher = new IndexSearcher(indexReader);
+    
+    String[] exp = new String[] { "Date (0)\n  2010 (4)\n  2011 (3)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date" } }, null);
+    
+    // two dimensions
+    exp = new String[] { "A (0)\n  1 (5)\n  4 (2)\n", "Date (0)\n  2010 (4)\n  2011 (3)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date" }, new String[] { "A" } }, null);
+    
+    // both parent and child are OR'd
+    exp = new String[] { "Date (-1)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n    May (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010/March", "Date/2010/March/23" }}, null);
+    
+    // both parent and child are OR'd (fill counts)
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n    May (1)\n" };
+    searchIndex(taxoReader, searcher, true, exp, new String[][] { new String[] { "Date/2010/March", "Date/2010/March/23" }}, null);
+    
+    // same DD twice
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n  2011 (3)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010", "Date/2010" }}, null);
+    
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n  2011 (3)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010" }}, null);
+    
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n  2011 (3)\n    February (2)\n    January (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010", "Date/2011" }}, null);
+    
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n    May (1)\n  2011 (3)\n    February (2)\n    January (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010/March", "Date/2011" }}, null);
+    
+    // Date/2010/April not in top-2 of Date/2010
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n      23 (1)\n      12 (1)\n    May (1)\n    April (1)\n      17 (1)\n  2011 (3)\n    February (2)\n    January (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2010/March", "Date/2010/April", "Date/2011" }}, null);
+    
+    // missing ancestors
+    exp = new String[] { "Date (-1)\n  2010 (4)\n    March (2)\n    May (1)\n    April (1)\n      17 (1)\n  2011 (-1)\n    January (1)\n      1 (1)\n" };
+    searchIndex(taxoReader, searcher, false, exp, new String[][] { new String[] { "Date/2011/January/1", "Date/2010/April" }}, null);
+    
+    // missing ancestors (fill counts)
+    exp = new String[] { "Date (0)\n  2010 (4)\n    March (2)\n    May (1)\n    April (1)\n      17 (1)\n  2011 (3)\n    January (1)\n      1 (1)\n" };
+    searchIndex(taxoReader, searcher, true, exp, new String[][] { new String[] { "Date/2011/January/1", "Date/2010/April" }}, null);
+    
+    // non-hierarchical dimension with both parent and child
+    exp = new String[] { "A (0)\n  1 (5)\n  4 (2)\n  3 (2)\n" };
+    searchIndex(taxoReader, searcher, INFOSTREAM, exp, new String[][] { new String[] { "A", "A/3" }}, null);
+    
+    // non-hierarchical dimension with same request but different numResults
+    exp = new String[] { "A (0)\n  1 (5)\n  4 (2)\n  3 (2)\n  2 (2)\n" };
+    searchIndex(taxoReader, searcher, INFOSTREAM, exp, new String[][] { new String[] { "A", "A" }}, new int[] { 2, 4 });
+    
+    IOUtils.close(indexReader, taxoReader);
+    
+    IOUtils.close(indexDir, taxoDir);
+  }
+  
+}

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java Tue Jul  2 07:12:00 2013
@@ -59,15 +59,18 @@ import org.apache.lucene.search.ScoreDoc
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortField.Type;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InPlaceMergeSorter;
 import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util._TestUtil;
+import org.junit.Test;
 
 public class TestDrillSideways extends FacetTestCase {
 
@@ -1144,5 +1147,34 @@ public class TestDrillSideways extends F
     }
     return b.toString();
   }
+  
+  @Test
+  public void testEmptyIndex() throws Exception {
+    // LUCENE-5045: make sure DrillSideways works with an empty index
+    Directory dir = newDirectory();
+    Directory taxoDir = newDirectory();
+    writer = new RandomIndexWriter(random(), dir);
+    taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
+    IndexSearcher searcher = newSearcher(writer.getReader());
+    writer.close();
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    taxoWriter.close();
+
+    // Count "Author"
+    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10));
+
+    DrillSideways ds = new DrillSideways(searcher, taxoReader);
+    DrillDownQuery ddq = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery());
+    ddq.add(new CategoryPath("Author", "Lisa"));
+    
+    DrillSidewaysResult r = ds.search(null, ddq, 10, fsp); // this used to fail on IllegalArgEx
+    assertEquals(0, r.hits.totalHits);
+
+    r = ds.search(ddq, null, null, 10, new Sort(new SortField("foo", Type.INT)), false, false, fsp); // this used to fail on IllegalArgEx
+    assertEquals(0, r.hits.totalHits);
+    
+    IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
+  }
+  
 }
 

Modified: lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java (original)
+++ lucene/dev/branches/lucene4258/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java Tue Jul  2 07:12:00 2013
@@ -17,8 +17,20 @@ import org.apache.lucene.facet.params.Ca
 import org.apache.lucene.facet.params.FacetIndexingParams;
 import org.apache.lucene.facet.params.FacetSearchParams;
 import org.apache.lucene.facet.params.PerDimensionIndexingParams;
+import org.apache.lucene.facet.range.LongRange;
+import org.apache.lucene.facet.range.RangeAccumulator;
+import org.apache.lucene.facet.range.RangeFacetRequest;
+import org.apache.lucene.facet.sampling.RandomSampler;
+import org.apache.lucene.facet.sampling.Sampler;
+import org.apache.lucene.facet.sampling.SamplingAccumulator;
+import org.apache.lucene.facet.sampling.SamplingParams;
+import org.apache.lucene.facet.sampling.SamplingWrapper;
+import org.apache.lucene.facet.sampling.TakmiSampleFixer;
 import org.apache.lucene.facet.search.FacetRequest.ResultMode;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
@@ -384,5 +396,72 @@ public class TestFacetsCollector extends
     
     IOUtils.close(taxo, taxoDir, r, indexDir);
   }
-  
+
+  @Test
+  public void testLabeling() throws Exception {
+    Directory indexDir = newDirectory(), taxoDir = newDirectory();
+
+    // create the index
+    IndexWriter indexWriter = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+    FacetFields facetFields = new FacetFields(taxoWriter);
+    Document doc = new Document();
+    facetFields.addFields(doc, Arrays.asList(new CategoryPath("A/1", '/')));
+    indexWriter.addDocument(doc);
+    IOUtils.close(indexWriter, taxoWriter);
+    
+    DirectoryReader indexReader = DirectoryReader.open(indexDir);
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
+    IndexSearcher searcher = new IndexSearcher(indexReader);
+    // ask to count a non-existing category to test labeling
+    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("B"), 5));
+    
+    final SamplingParams sampleParams = new SamplingParams();
+    sampleParams.setMaxSampleSize(100);
+    sampleParams.setMinSampleSize(100);
+    sampleParams.setSamplingThreshold(100);
+    sampleParams.setOversampleFactor(1.0d);
+    if (random().nextBoolean()) {
+      sampleParams.setSampleFixer(new TakmiSampleFixer(indexReader, taxoReader, fsp));
+    }
+    final Sampler sampler = new RandomSampler(sampleParams, random());
+    
+    FacetsAccumulator[] accumulators = new FacetsAccumulator[] {
+      new FacetsAccumulator(fsp, indexReader, taxoReader),
+      new StandardFacetsAccumulator(fsp, indexReader, taxoReader),
+      new SamplingAccumulator(sampler, fsp, indexReader, taxoReader),
+      new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader),
+      new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler)
+    };
+    
+    for (FacetsAccumulator fa : accumulators) {
+      FacetsCollector fc = FacetsCollector.create(fa);
+      searcher.search(new MatchAllDocsQuery(), fc);
+      List<FacetResult> facetResults = fc.getFacetResults();
+      assertNotNull(facetResults);
+      assertEquals("incorrect label returned for " + fa, fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
+    }
+    
+    try {
+      // SortedSetDocValuesAccumulator cannot even be created in such state
+      assertNull(new SortedSetDocValuesAccumulator(fsp, new SortedSetDocValuesReaderState(indexReader)));
+      // if this ever changes, make sure FacetResultNode is labeled correctly 
+      fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField");
+    } catch (IllegalArgumentException e) {
+      // expected
+    }
+
+    fsp = new FacetSearchParams(new RangeFacetRequest<LongRange>("f", new LongRange("grr", 0, true, 1, true)));
+    RangeAccumulator ra = new RangeAccumulator(fsp, indexReader);
+    FacetsCollector fc = FacetsCollector.create(ra);
+    searcher.search(new MatchAllDocsQuery(), fc);
+    List<FacetResult> facetResults = fc.getFacetResults();
+    assertNotNull(facetResults);
+    assertEquals("incorrect label returned for RangeAccumulator", fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label);
+
+    IOUtils.close(indexReader, taxoReader);
+
+    IOUtils.close(indexDir, taxoDir);
+  }
+
 }

Modified: lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java Tue Jul  2 07:12:00 2013
@@ -72,74 +72,78 @@ public class AllGroupHeadsCollectorTest 
         dir,
         newIndexWriterConfig(TEST_VERSION_CURRENT,
             new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
-    boolean canUseIDV = true;
     DocValuesType valueType = vts[random().nextInt(vts.length)];
 
     // 0
     Document doc = new Document();
-    addGroupField(doc, groupField, "author1", canUseIDV, valueType);
-    doc.add(newTextField("content", "random text", Field.Store.YES));
-    doc.add(newStringField("id", "1", Field.Store.YES));
+    addGroupField(doc, groupField, "author1", valueType);
+    doc.add(newTextField("content", "random text", Field.Store.NO));
+    doc.add(newStringField("id_1", "1", Field.Store.NO));
+    doc.add(newStringField("id_2", "1", Field.Store.NO));
     w.addDocument(doc);
 
     // 1
     doc = new Document();
-    addGroupField(doc, groupField, "author1", canUseIDV, valueType);
-    doc.add(newTextField("content", "some more random text blob", Field.Store.YES));
-    doc.add(newStringField("id", "2", Field.Store.YES));
+    addGroupField(doc, groupField, "author1", valueType);
+    doc.add(newTextField("content", "some more random text blob", Field.Store.NO));
+    doc.add(newStringField("id_1", "2", Field.Store.NO));
+    doc.add(newStringField("id_2", "2", Field.Store.NO));
     w.addDocument(doc);
 
     // 2
     doc = new Document();
-    addGroupField(doc, groupField, "author1", canUseIDV, valueType);
-    doc.add(newTextField("content", "some more random textual data", Field.Store.YES));
-    doc.add(newStringField("id", "3", Field.Store.YES));
+    addGroupField(doc, groupField, "author1", valueType);
+    doc.add(newTextField("content", "some more random textual data", Field.Store.NO));
+    doc.add(newStringField("id_1", "3", Field.Store.NO));
+    doc.add(newStringField("id_2", "3", Field.Store.NO));
     w.addDocument(doc);
     w.commit(); // To ensure a second segment
 
     // 3
     doc = new Document();
-    addGroupField(doc, groupField, "author2", canUseIDV, valueType);
-    doc.add(newTextField("content", "some random text", Field.Store.YES));
-    doc.add(newStringField("id", "4", Field.Store.YES));
+    addGroupField(doc, groupField, "author2", valueType);
+    doc.add(newTextField("content", "some random text", Field.Store.NO));
+    doc.add(newStringField("id_1", "4", Field.Store.NO));
+    doc.add(newStringField("id_2", "4", Field.Store.NO));
     w.addDocument(doc);
 
     // 4
     doc = new Document();
-    addGroupField(doc, groupField, "author3", canUseIDV, valueType);
-    doc.add(newTextField("content", "some more random text", Field.Store.YES));
-    doc.add(newStringField("id", "5", Field.Store.YES));
+    addGroupField(doc, groupField, "author3", valueType);
+    doc.add(newTextField("content", "some more random text", Field.Store.NO));
+    doc.add(newStringField("id_1", "5", Field.Store.NO));
+    doc.add(newStringField("id_2", "5", Field.Store.NO));
     w.addDocument(doc);
 
     // 5
     doc = new Document();
-    addGroupField(doc, groupField, "author3", canUseIDV, valueType);
-    doc.add(newTextField("content", "random blob", Field.Store.YES));
-    doc.add(newStringField("id", "6", Field.Store.YES));
+    addGroupField(doc, groupField, "author3", valueType);
+    doc.add(newTextField("content", "random blob", Field.Store.NO));
+    doc.add(newStringField("id_1", "6", Field.Store.NO));
+    doc.add(newStringField("id_2", "6", Field.Store.NO));
     w.addDocument(doc);
 
     // 6 -- no author field
     doc = new Document();
-    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.YES));
-    doc.add(newStringField("id", "6", Field.Store.YES));
+    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
+    doc.add(newStringField("id_1", "6", Field.Store.NO));
+    doc.add(newStringField("id_2", "6", Field.Store.NO));
     w.addDocument(doc);
 
     // 7 -- no author field
     doc = new Document();
-    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.YES));
-    doc.add(newStringField("id", "7", Field.Store.YES));
+    doc.add(newTextField("content", "random word stuck in alot of other text", Field.Store.NO));
+    doc.add(newStringField("id_1", "7", Field.Store.NO));
+    doc.add(newStringField("id_2", "7", Field.Store.NO));
     w.addDocument(doc);
 
     IndexReader reader = w.getReader();
     IndexSearcher indexSearcher = newSearcher(reader);
-    if (SlowCompositeReaderWrapper.class.isAssignableFrom(reader.getClass())) {
-      canUseIDV = false;
-    }
 
     w.close();
     int maxDoc = reader.maxDoc();
 
-    Sort sortWithinGroup = new Sort(new SortField("id", SortField.Type.INT, true));
+    Sort sortWithinGroup = new Sort(new SortField("id_1", SortField.Type.INT, true));
     AbstractAllGroupHeadsCollector<?> allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup);
     indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
     assertTrue(arrayContains(new int[]{2, 3, 5, 7}, allGroupHeadsCollector.retrieveGroupHeads()));
@@ -156,13 +160,13 @@ public class AllGroupHeadsCollectorTest 
     assertTrue(openBitSetContains(new int[]{1, 5}, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
 
     // STRING sort type triggers different implementation
-    Sort sortWithinGroup2 = new Sort(new SortField("id", SortField.Type.STRING, true));
+    Sort sortWithinGroup2 = new Sort(new SortField("id_2", SortField.Type.STRING, true));
     allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup2);
     indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
     assertTrue(arrayContains(new int[]{2, 3, 5, 7}, allGroupHeadsCollector.retrieveGroupHeads()));
     assertTrue(openBitSetContains(new int[]{2, 3, 5, 7}, allGroupHeadsCollector.retrieveGroupHeads(maxDoc), maxDoc));
 
-    Sort sortWithinGroup3 = new Sort(new SortField("id", SortField.Type.STRING, false));
+    Sort sortWithinGroup3 = new Sort(new SortField("id_2", SortField.Type.STRING, false));
     allGroupHeadsCollector = createRandomCollector(groupField, sortWithinGroup3);
     indexSearcher.search(new TermQuery(new Term("content", "random")), allGroupHeadsCollector);
     // 7 b/c higher doc id wins, even if order of field is in not in reverse.
@@ -402,6 +406,7 @@ public class AllGroupHeadsCollectorTest 
       for (int a : actual) {
         if (e == a) {
           found = true;
+          break;
         }
       }
 
@@ -539,11 +544,10 @@ public class AllGroupHeadsCollectorTest 
     return collector;
   }
 
-  private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, DocValuesType valueType) {
-    doc.add(new TextField(groupField, value, Field.Store.YES));
-    if (canUseIDV) {
-      Field valuesField = null;
-      switch(valueType) {
+  private void addGroupField(Document doc, String groupField, String value, DocValuesType valueType) {
+    doc.add(new TextField(groupField, value, Field.Store.NO));
+    Field valuesField = null;
+    switch(valueType) {
       case BINARY:
         valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
         break;
@@ -552,9 +556,8 @@ public class AllGroupHeadsCollectorTest 
         break;
       default:
         fail("unhandled type");
-      }
-      doc.add(valuesField);
     }
+    doc.add(valuesField);
   }
 
   private static class GroupDoc {

Modified: lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/lucene4258/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Tue Jul  2 07:12:00 2013
@@ -827,12 +827,14 @@ public class TestGrouping extends Lucene
           for(SortField sf : docSort.getSort()) {
             if (sf.getType() == SortField.Type.SCORE) {
               getScores = true;
+              break;
             }
           }
 
           for(SortField sf : groupSort.getSort()) {
             if (sf.getType() == SortField.Type.SCORE) {
               getScores = true;
+              break;
             }
           }
 

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/PostingsHighlighter.java Tue Jul  2 07:12:00 2013
@@ -669,7 +669,7 @@ public class PostingsHighlighter {
     public void stringField(FieldInfo fieldInfo, String value) throws IOException {
       assert currentField >= 0;
       StringBuilder builder = builders[currentField];
-      if (builder.length() > 0) {
+      if (builder.length() > 0 && builder.length() < maxLength) {
         builder.append(' '); // for the offset gap, TODO: make this configurable
       }
       if (builder.length() + value.length() > maxLength) {

Modified: lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java?rev=1498804&r1=1498803&r2=1498804&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/highlighter/src/test/org/apache/lucene/search/postingshighlight/TestPostingsHighlighter.java Tue Jul  2 07:12:00 2013
@@ -123,6 +123,43 @@ public class TestPostingsHighlighter ext
     dir.close();
   }
   
+  // simple test with multiple values that make a result longer than maxLength.
+  public void testMaxLengthWithMultivalue() throws Exception {
+    Directory dir = newDirectory();
+    // use simpleanalyzer for more natural tokenization (else "test." is a token)
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
+    
+    FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
+    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+    Document doc = new Document();
+    
+    for(int i = 0; i < 3 ; i++) {
+      Field body = new Field("body", "", offsetsType);
+      body.setStringValue("This is a multivalued field");
+      doc.add(body);
+    }
+    
+    iw.addDocument(doc);
+    
+    IndexReader ir = iw.getReader();
+    iw.close();
+    
+    IndexSearcher searcher = newSearcher(ir);
+    PostingsHighlighter highlighter = new PostingsHighlighter(40);
+    Query query = new TermQuery(new Term("body", "field"));
+    TopDocs topDocs = searcher.search(query, null, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits);
+    String snippets[] = highlighter.highlight("body", query, searcher, topDocs);
+    assertEquals(1, snippets.length);
+    assertTrue("Snippet should have maximum 40 characters plus the pre and post tags",
+        snippets[0].length() == (40 + "<b></b>".length()));
+    
+    ir.close();
+    dir.close();
+  }
+  
   public void testMultipleFields() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));