You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/05/30 09:53:46 UTC
svn commit: r1487777 [24/50] - in /lucene/dev/branches/security: ./
dev-tools/ dev-tools/eclipse/dot.settings/ dev-tools/idea/.idea/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/replicator/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/ma...
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -48,7 +48,7 @@ public class MultiAssociationsFacetsAggr
* Creates a new {@link MultiAssociationsFacetsAggregator} over the given
* aggregators. The mapping is used by
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} to
- * rollup the values of the speicfic category by the corresponding
+ * rollup the values of the specific category by the corresponding
* {@link FacetsAggregator}. However, since each {@link FacetsAggregator}
* handles the associations of a specific type, which could cover multiple
* categories, the aggregation is done on the unique set of aggregators, which
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -7,7 +7,6 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@@ -32,6 +31,10 @@ import org.apache.lucene.util.BytesRef;
* A {@link FacetsAggregator} which computes the weight of a category as the sum
* of the float values associated with it in the result documents. Assumes that
* the association encoded for each ordinal is {@link CategoryFloatAssociation}.
+ * <p>
+ * <b>NOTE:</b> this aggregator does not support
+ * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
+ * aggregates the categories for which you added a {@link CategoryAssociation}.
*
* @lucene.experimental
*/
@@ -77,22 +80,9 @@ public class SumFloatAssociationFacetsAg
return false;
}
- private float rollupValues(int ordinal, int[] children, int[] siblings, float[] scores) {
- float Value = 0f;
- while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
- float childValue = scores[ordinal];
- childValue += rollupValues(children[ordinal], children, siblings, scores);
- scores[ordinal] = childValue;
- Value += childValue;
- ordinal = siblings[ordinal];
- }
- return Value;
- }
-
@Override
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
- float[] values = facetArrays.getFloatArray();
- values[ordinal] += rollupValues(children[ordinal], children, siblings, values);
+ // NO-OP: this aggregator does no rollup values to the parents.
}
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -7,7 +7,6 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@@ -30,8 +29,13 @@ import org.apache.lucene.util.BytesRef;
/**
* A {@link FacetsAggregator} which computes the weight of a category as the sum
- * of the integer values associated with it in the result documents. Assumes that
- * the association encoded for each ordinal is {@link CategoryIntAssociation}.
+ * of the integer values associated with it in the result documents. Assumes
+ * that the association encoded for each ordinal is
+ * {@link CategoryIntAssociation}.
+ * <p>
+ * <b>NOTE:</b> this aggregator does not support
+ * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)}. It only
+ * aggregates the categories for which you added a {@link CategoryAssociation}.
*/
public class SumIntAssociationFacetsAggregator implements FacetsAggregator {
@@ -75,22 +79,9 @@ public class SumIntAssociationFacetsAggr
return false;
}
- private float rollupValues(int ordinal, int[] children, int[] siblings, float[] scores) {
- float Value = 0f;
- while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
- float childValue = scores[ordinal];
- childValue += rollupValues(children[ordinal], children, siblings, scores);
- scores[ordinal] = childValue;
- Value += childValue;
- ordinal = siblings[ordinal];
- }
- return Value;
- }
-
@Override
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
- float[] values = facetArrays.getFloatArray();
- values[ordinal] += rollupValues(children[ordinal], children, siblings, values);
+ // NO-OP: this aggregator does no rollup values to the parents.
}
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/doc-files/userguide.html Thu May 30 07:53:18 2013
@@ -261,7 +261,7 @@ Following is a code snippet for indexing
found in package <code>org.apache.lucene.facet.example.simple.SimpleIndexer</code>.
<pre class="prettyprint lang-java linenums">
IndexWriter writer = ...
-TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
+TaxonomyWriter taxo = new DirectoryTaxonomyWriter(taxoDir);
...
Document doc = new Document();
doc.add(new Field("title", titleText, Store.YES, Index.ANALYZED));
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java Thu May 30 07:53:18 2013
@@ -152,12 +152,15 @@ public class CountingListBuilder impleme
if (op != OrdinalPolicy.NO_PARENTS) {
// need to add parents too
int parent = taxoWriter.getParent(ordinal);
- while (parent > 0) {
- ordinals.ints[ordinals.length++] = parent;
- parent = taxoWriter.getParent(parent);
- }
- if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
- ordinals.length--;
+ if (parent > 0) {
+ // only do this if the category is not a dimension itself, otherwise, it was just discarded by the 'if' below
+ while (parent > 0) {
+ ordinals.ints[ordinals.length++] = parent;
+ parent = taxoWriter.getParent(parent);
+ }
+ if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
+ ordinals.length--;
+ }
}
}
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java Thu May 30 07:53:18 2013
@@ -48,18 +48,6 @@ import org.apache.lucene.util.IntsRef;
*/
public class FacetFields {
- // The counting list is written in a payload, but we don't store it
- // nor need norms.
- private static final FieldType COUNTING_LIST_PAYLOAD_TYPE = new FieldType();
- static {
- COUNTING_LIST_PAYLOAD_TYPE.setIndexed(true);
- COUNTING_LIST_PAYLOAD_TYPE.setTokenized(true);
- COUNTING_LIST_PAYLOAD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
- COUNTING_LIST_PAYLOAD_TYPE.setStored(false);
- COUNTING_LIST_PAYLOAD_TYPE.setOmitNorms(true);
- COUNTING_LIST_PAYLOAD_TYPE.freeze();
- }
-
// The drill-down field is added with a TokenStream, hence why it's based on
// TextField type. However in practice, it is added just like StringField.
// Therefore we set its IndexOptions to DOCS_ONLY.
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java Thu May 30 07:53:18 2013
@@ -3,6 +3,7 @@ package org.apache.lucene.facet.sampling
import java.io.IOException;
import org.apache.lucene.facet.search.FacetResult;
+import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.ScoredDocIDs;
/*
@@ -23,22 +24,50 @@ import org.apache.lucene.facet.search.Sc
*/
/**
- * Fixer of sample facet accumulation results
+ * Fixer of sample facet accumulation results.
*
* @lucene.experimental
*/
-public interface SampleFixer {
+public abstract class SampleFixer {
/**
* Alter the input result, fixing it to account for the sampling. This
- * implementation can compute accurate or estimated counts for the sampled facets.
- * For example, a faster correction could just multiply by a compensating factor.
+ * implementation can compute accurate or estimated counts for the sampled
+ * facets. For example, a faster correction could just multiply by a
+ * compensating factor.
*
* @param origDocIds
* full set of matching documents.
* @param fres
* sample result to be fixed.
- * @throws IOException If there is a low-level I/O error.
+ * @throws IOException
+ * If there is a low-level I/O error.
*/
- public void fixResult(ScoredDocIDs origDocIds, FacetResult fres) throws IOException;
+ public void fixResult(ScoredDocIDs origDocIds, FacetResult fres, double samplingRatio) throws IOException {
+ FacetResultNode topRes = fres.getFacetResultNode();
+ fixResultNode(topRes, origDocIds, samplingRatio);
+ }
+
+ /**
+ * Fix result node count, and, recursively, fix all its children
+ *
+ * @param facetResNode
+ * result node to be fixed
+ * @param docIds
+ * docids in effect
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ protected void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
+ throws IOException {
+ singleNodeFix(facetResNode, docIds, samplingRatio);
+ for (FacetResultNode frn : facetResNode.subResults) {
+ fixResultNode(frn, docIds, samplingRatio);
+ }
+ }
+
+ /** Fix the given node's value. */
+ protected abstract void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
+ throws IOException;
+
}
\ No newline at end of file
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java Thu May 30 07:53:18 2013
@@ -12,7 +12,6 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.IndexReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -111,16 +110,6 @@ public abstract class Sampler {
throws IOException;
/**
- * Get a fixer of sample facet accumulation results. Default implementation
- * returns a <code>TakmiSampleFixer</code> which is adequate only for
- * counting. For any other accumulator, provide a different fixer.
- */
- public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
- FacetSearchParams searchParams) {
- return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
- }
-
- /**
* Result of sample computation
*/
public final static class SampleResult {
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java Thu May 30 07:53:18 2013
@@ -79,7 +79,11 @@ public class SamplingAccumulator extends
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// Replacing the original searchParams with the over-sampled
FacetSearchParams original = searchParams;
- searchParams = sampler.overSampledSearchParams(original);
+ SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
+ final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+ if (shouldOversample) {
+ searchParams = sampler.overSampledSearchParams(original);
+ }
List<FacetResult> sampleRes = super.accumulate(docids);
@@ -87,14 +91,18 @@ public class SamplingAccumulator extends
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
- // fix the result of current request
- sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres);
-
- fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
-
- // Using the sampler to trim the extra (over-sampled) results
- fres = sampler.trimResult(fres);
+ if (samplerFixer != null) {
+ // fix the result of current request
+ samplerFixer.fixResult(docids, fres, samplingRatio);
+
+ fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
+ if (shouldOversample) {
+ // Using the sampler to trim the extra (over-sampled) results
+ fres = sampler.trimResult(fres);
+ }
+ }
+
// final labeling if allowed (because labeling is a costly operation)
frh.labelResult(fres);
fixedRes.add(fres); // add to final results
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java Thu May 30 07:53:18 2013
@@ -28,7 +28,7 @@ public class SamplingParams {
* Default factor by which more results are requested over the sample set.
* @see SamplingParams#getOversampleFactor()
*/
- public static final double DEFAULT_OVERSAMPLE_FACTOR = 2d;
+ public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
/**
* Default ratio between size of sample to original size of document set.
@@ -59,6 +59,8 @@ public class SamplingParams {
private double sampleRatio = DEFAULT_SAMPLE_RATIO;
private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
+
+ private SampleFixer sampleFixer = null;
/**
* Return the maxSampleSize.
@@ -166,4 +168,29 @@ public class SamplingParams {
this.oversampleFactor = oversampleFactor;
}
-}
\ No newline at end of file
+ /**
+ * @return {@link SampleFixer} to be used while fixing the sampled results, if
+ * <code>null</code> no fixing will be performed
+ */
+ public SampleFixer getSampleFixer() {
+ return sampleFixer;
+ }
+
+ /**
+ * Set a {@link SampleFixer} to be used while fixing the sampled results.
+ * {@code null} means no fixing will be performed
+ */
+ public void setSampleFixer(SampleFixer sampleFixer) {
+ this.sampleFixer = sampleFixer;
+ }
+
+ /**
+ * Returns whether over-sampling should be done. By default returns
+ * {@code true} when {@link #getSampleFixer()} is not {@code null} and
+ * {@link #getOversampleFactor()} > 1, {@code false} otherwise.
+ */
+ public boolean shouldOverSample() {
+ return sampleFixer != null && oversampleFactor > 1d;
+ }
+
+}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java Thu May 30 07:53:18 2013
@@ -52,29 +52,41 @@ public class SamplingWrapper extends Sta
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// Replacing the original searchParams with the over-sampled (and without statistics-compute)
FacetSearchParams original = delegee.searchParams;
- delegee.searchParams = sampler.overSampledSearchParams(original);
+ boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+
+ if (shouldOversample) {
+ delegee.searchParams = sampler.overSampledSearchParams(original);
+ }
SampleResult sampleSet = sampler.getSampleSet(docids);
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+ SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
+
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
- // fix the result of current request
- sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres);
- fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+ if (sampleFixer != null) {
+ // fix the result of current request
+ sampleFixer.fixResult(docids, fres, sampleSet.actualSampleRatio);
+ fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+ }
- // Using the sampler to trim the extra (over-sampled) results
- fres = sampler.trimResult(fres);
+ if (shouldOversample) {
+ // Using the sampler to trim the extra (over-sampled) results
+ fres = sampler.trimResult(fres);
+ }
// final labeling if allowed (because labeling is a costly operation)
frh.labelResult(fres);
fixedRes.add(fres); // add to final results
}
- delegee.searchParams = original; // Back to original params
+ if (shouldOversample) {
+ delegee.searchParams = original; // Back to original params
+ }
return fixedRes;
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java Thu May 30 07:53:18 2013
@@ -2,21 +2,19 @@ package org.apache.lucene.facet.sampling
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.Bits;
-
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.DrillDownQuery;
-import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIDsIterator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,16 +34,21 @@ import org.apache.lucene.facet.taxonomy.
*/
/**
- * Fix sampling results by counting the intersection between two lists: a
- * TermDocs (list of documents in a certain category) and a DocIdSetIterator
- * (list of documents matching the query).
- *
+ * Fix sampling results by correct results, by counting the intersection between
+ * two lists: a TermDocs (list of documents in a certain category) and a
+ * DocIdSetIterator (list of documents matching the query).
+ * <p>
+ * This fixer is suitable for scenarios which prioritize accuracy over
+ * performance.
+ * <p>
+ * <b>Note:</b> for statistically more accurate top-k selection, set
+ * {@link SamplingParams#setOversampleFactor(double) oversampleFactor} to at
+ * least 2, so that the top-k categories would have better chance of showing up
+ * in the sampled top-cK results (see {@link SamplingParams#getOversampleFactor}
*
* @lucene.experimental
*/
-// TODO (Facet): implement also an estimated fixing by ratio (taking into
-// account "translation" of counts!)
-class TakmiSampleFixer implements SampleFixer {
+public class TakmiSampleFixer extends SampleFixer {
private TaxonomyReader taxonomyReader;
private IndexReader indexReader;
@@ -59,28 +62,10 @@ class TakmiSampleFixer implements Sample
}
@Override
- public void fixResult(ScoredDocIDs origDocIds, FacetResult fres)
- throws IOException {
- FacetResultNode topRes = fres.getFacetResultNode();
- fixResultNode(topRes, origDocIds);
- }
-
- /**
- * Fix result node count, and, recursively, fix all its children
- *
- * @param facetResNode
- * result node to be fixed
- * @param docIds
- * docids in effect
- * @throws IOException If there is a low-level I/O error.
- */
- private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) throws IOException {
+ public void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) throws IOException {
recount(facetResNode, docIds);
- for (FacetResultNode frn : facetResNode.subResults) {
- fixResultNode(frn, docIds);
- }
}
-
+
/**
* Internal utility: recount for a facet result node
*
@@ -179,4 +164,5 @@ class TakmiSampleFixer implements Sample
}
return false; // exhausted
}
+
}
\ No newline at end of file
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java Thu May 30 07:53:18 2013
@@ -7,8 +7,8 @@ import java.util.Collections;
import java.util.Comparator;
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
+import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.util.PriorityQueue;
/*
@@ -46,7 +46,7 @@ public abstract class DepthOneFacetResul
@Override
protected FacetResultNode getSentinelObject() {
- return new FacetResultNode();
+ return new FacetResultNode(TaxonomyReader.INVALID_ORDINAL, 0);
}
@Override
@@ -80,7 +80,8 @@ public abstract class DepthOneFacetResul
* Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The
* given {@link PriorityQueue} is already filled with sentinel objects, so
* implementations are encouraged to use {@link PriorityQueue#top()} and
- * {@link PriorityQueue#updateTop()} for best performance.
+ * {@link PriorityQueue#updateTop()} for best performance. Returns the total
+ * number of siblings.
*/
protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq);
@@ -92,10 +93,8 @@ public abstract class DepthOneFacetResul
int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
- FacetResultNode root = new FacetResultNode();
- root.ordinal = rootOrd;
+ FacetResultNode root = new FacetResultNode(rootOrd, valueOf(rootOrd));
root.label = facetRequest.categoryPath;
- root.value = valueOf(rootOrd);
if (facetRequest.numResults > taxonomyReader.getSize()) {
// specialize this case, user is interested in all available results
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
@@ -118,11 +117,11 @@ public abstract class DepthOneFacetResul
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
- int numResults = addSiblings(children[rootOrd], siblings, pq);
+ int numSiblings = addSiblings(children[rootOrd], siblings, pq);
// pop() the least (sentinel) elements
int pqsize = pq.size();
- int size = numResults < pqsize ? numResults : pqsize;
+ int size = numSiblings < pqsize ? numSiblings : pqsize;
for (int i = pqsize - size; i > 0; i--) { pq.pop(); }
// create the FacetResultNodes.
@@ -133,7 +132,7 @@ public abstract class DepthOneFacetResul
subResults[i] = node;
}
root.subResults = Arrays.asList(subResults);
- return new FacetResult(facetRequest, root, size);
+ return new FacetResult(facetRequest, root, numSiblings);
}
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillDownQuery.java Thu May 30 07:53:18 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.LinkedHashMap;
+import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.params.CategoryListParams;
@@ -61,7 +62,7 @@ public final class DrillDownQuery extend
private final BooleanQuery query;
private final Map<String,Integer> drillDownDims = new LinkedHashMap<String,Integer>();
- private final FacetIndexingParams fip;
+ final FacetIndexingParams fip;
/** Used by clone() */
DrillDownQuery(FacetIndexingParams fip, BooleanQuery query, Map<String,Integer> drillDownDims) {
@@ -87,6 +88,19 @@ public final class DrillDownQuery extend
fip = other.fip;
}
+ /** Used by DrillSideways */
+ DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
+ this.fip = fip;
+ this.query = new BooleanQuery(true);
+ if (baseQuery != null) {
+ query.add(baseQuery, Occur.MUST);
+ }
+ for(Query clause : clauses) {
+ query.add(clause, Occur.MUST);
+ }
+ this.drillDownDims.putAll(drillDownDims);
+ }
+
/**
* Creates a new {@link DrillDownQuery} without a base query,
* to perform a pure browsing query (equivalent to using
@@ -139,11 +153,25 @@ public final class DrillDownQuery extend
}
q = bq;
}
- drillDownDims.put(dim, drillDownDims.size());
- final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(q);
+ add(dim, q);
+ }
+
+ /** Expert: add a custom drill-down subQuery. Use this
+ * when you have a separate way to drill-down on the
+ * dimension than the indexed facet ordinals. */
+ public void add(String dim, Query subQuery) {
+
+ // TODO: we should use FilteredQuery?
+
+ // So scores of the drill-down query don't have an
+ // effect:
+ final ConstantScoreQuery drillDownQuery = new ConstantScoreQuery(subQuery);
drillDownQuery.setBoost(0.0f);
+
query.add(drillDownQuery, Occur.MUST);
+
+ drillDownDims.put(dim, drillDownDims.size());
}
@Override
@@ -153,7 +181,9 @@ public final class DrillDownQuery extend
@Override
public int hashCode() {
- return query.hashCode();
+ final int prime = 31;
+ int result = super.hashCode();
+ return prime * result + query.hashCode();
}
@Override
@@ -163,7 +193,7 @@ public final class DrillDownQuery extend
}
DrillDownQuery other = (DrillDownQuery) obj;
- return query.equals(other.query);
+ return query.equals(other.query) && super.equals(other);
}
@Override
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java Thu May 30 07:53:18 2013
@@ -19,11 +19,15 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -33,6 +37,7 @@ import org.apache.lucene.search.FieldDoc
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
@@ -40,6 +45,7 @@ import org.apache.lucene.search.TermQuer
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.Weight;
/**
* Computes drill down and sideways counts for the provided
@@ -71,27 +77,110 @@ public class DrillSideways {
this.taxoReader = taxoReader;
}
+ /** Moves any drill-downs that don't have a corresponding
+ * facet request into the baseQuery. This is unusual,
+ * yet allowed, because typically the added drill-downs are because
+ * the user has clicked on previously presented facets,
+ * and those same facets would be computed this time
+ * around. */
+ private static DrillDownQuery moveDrillDownOnlyClauses(DrillDownQuery in, FacetSearchParams fsp) {
+ Set<String> facetDims = new HashSet<String>();
+ for(FacetRequest fr : fsp.facetRequests) {
+ if (fr.categoryPath.length == 0) {
+ throw new IllegalArgumentException("all FacetRequests must have CategoryPath with length > 0");
+ }
+ facetDims.add(fr.categoryPath.components[0]);
+ }
+
+ BooleanClause[] clauses = in.getBooleanQuery().getClauses();
+ Map<String,Integer> drillDownDims = in.getDims();
+
+ String[] dimsByIndex = new String[drillDownDims.size()];
+ for(Map.Entry<String,Integer> ent : drillDownDims.entrySet()) {
+ dimsByIndex[ent.getValue()] = ent.getKey();
+ }
+
+ int startClause;
+ if (clauses.length == drillDownDims.size()) {
+ startClause = 0;
+ } else {
+ assert clauses.length == 1+drillDownDims.size();
+ startClause = 1;
+ }
+
+ // Break out drill-down clauses that have no
+ // corresponding facet request and move them inside the
+ // baseQuery:
+ List<Query> nonFacetClauses = new ArrayList<Query>();
+ List<Query> facetClauses = new ArrayList<Query>();
+ Map<String,Integer> dimToIndex = new LinkedHashMap<String,Integer>();
+ for(int i=startClause;i<clauses.length;i++) {
+ Query q = clauses[i].getQuery();
+ String dim = dimsByIndex[i-startClause];
+ if (!facetDims.contains(dim)) {
+ nonFacetClauses.add(q);
+ } else {
+ facetClauses.add(q);
+ dimToIndex.put(dim, dimToIndex.size());
+ }
+ }
+
+ if (!nonFacetClauses.isEmpty()) {
+ BooleanQuery newBaseQuery = new BooleanQuery(true);
+ if (startClause == 1) {
+ // Add original basaeQuery:
+ newBaseQuery.add(clauses[0].getQuery(), BooleanClause.Occur.MUST);
+ }
+ for(Query q : nonFacetClauses) {
+ newBaseQuery.add(q, BooleanClause.Occur.MUST);
+ }
+
+ return new DrillDownQuery(fsp.indexingParams, newBaseQuery, facetClauses, dimToIndex);
+ } else {
+ // No change:
+ return in;
+ }
+ }
+
/**
* Search, collecting hits with a {@link Collector}, and
* computing drill down and sideways counts.
*/
+ @SuppressWarnings({"rawtypes","unchecked"})
public DrillSidewaysResult search(DrillDownQuery query,
Collector hitCollector, FacetSearchParams fsp) throws IOException {
+ if (query.fip != fsp.indexingParams) {
+ throw new IllegalArgumentException("DrillDownQuery's FacetIndexingParams should match FacetSearchParams'");
+ }
+
+ query = moveDrillDownOnlyClauses(query, fsp);
+
Map<String,Integer> drillDownDims = query.getDims();
if (drillDownDims.isEmpty()) {
- throw new IllegalArgumentException("there must be at least one drill-down");
+ // Just do ordinary search when there are no drill-downs:
+ FacetsCollector c = FacetsCollector.create(getDrillDownAccumulator(fsp));
+ searcher.search(query, MultiCollector.wrap(hitCollector, c));
+ return new DrillSidewaysResult(c.getFacetResults(), null);
}
- BooleanQuery ddq = query.getBooleanQuery();
- BooleanClause[] clauses = ddq.getClauses();
-
- for(FacetRequest fr : fsp.facetRequests) {
- if (fr.categoryPath.length == 0) {
- throw new IllegalArgumentException("all FacetRequests must have CategoryPath with length > 0");
+ List<FacetRequest> ddRequests = new ArrayList<FacetRequest>();
+ for(FacetRequest fr : fsp.facetRequests) {
+ assert fr.categoryPath.length > 0;
+ if (!drillDownDims.containsKey(fr.categoryPath.components[0])) {
+ ddRequests.add(fr);
}
}
+ FacetSearchParams fsp2;
+ if (!ddRequests.isEmpty()) {
+ fsp2 = new FacetSearchParams(fsp.indexingParams, ddRequests);
+ } else {
+ fsp2 = null;
+ }
+
+ BooleanQuery ddq = query.getBooleanQuery();
+ BooleanClause[] clauses = ddq.getClauses();
Query baseQuery;
int startClause;
@@ -106,54 +195,82 @@ public class DrillSideways {
startClause = 1;
}
- Term[][] drillDownTerms = new Term[clauses.length-startClause][];
- for(int i=startClause;i<clauses.length;i++) {
- Query q = clauses[i].getQuery();
- assert q instanceof ConstantScoreQuery;
- q = ((ConstantScoreQuery) q).getQuery();
- assert q instanceof TermQuery || q instanceof BooleanQuery;
- if (q instanceof TermQuery) {
- drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
- } else {
- BooleanQuery q2 = (BooleanQuery) q;
- BooleanClause[] clauses2 = q2.getClauses();
- drillDownTerms[i-startClause] = new Term[clauses2.length];
- for(int j=0;j<clauses2.length;j++) {
- assert clauses2[j].getQuery() instanceof TermQuery;
- drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
- }
- }
- }
-
- FacetsCollector drillDownCollector = FacetsCollector.create(getDrillDownAccumulator(fsp));
+ FacetsCollector drillDownCollector = fsp2 == null ? null : FacetsCollector.create(getDrillDownAccumulator(fsp2));
FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
int idx = 0;
for(String dim : drillDownDims.keySet()) {
- FacetRequest drillSidewaysRequest = null;
+ List<FacetRequest> requests = new ArrayList<FacetRequest>();
for(FacetRequest fr : fsp.facetRequests) {
assert fr.categoryPath.length > 0;
if (fr.categoryPath.components[0].equals(dim)) {
- if (drillSidewaysRequest != null) {
- throw new IllegalArgumentException("multiple FacetRequests for drill-sideways dimension \"" + dim + "\"");
- }
- drillSidewaysRequest = fr;
+ requests.add(fr);
}
}
- if (drillSidewaysRequest == null) {
+ if (requests.isEmpty()) {
throw new IllegalArgumentException("could not find FacetRequest for drill-sideways dimension \"" + dim + "\"");
}
- drillSidewaysCollectors[idx++] = FacetsCollector.create(getDrillSidewaysAccumulator(dim, new FacetSearchParams(fsp.indexingParams, drillSidewaysRequest)));
+ drillSidewaysCollectors[idx++] = FacetsCollector.create(getDrillSidewaysAccumulator(dim, new FacetSearchParams(fsp.indexingParams, requests)));
}
- DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+ boolean useCollectorMethod = scoreSubDocsAtOnce();
- searcher.search(dsq, hitCollector);
+ Term[][] drillDownTerms = null;
- List<FacetResult> drillDownResults = drillDownCollector.getFacetResults();
+ if (!useCollectorMethod) {
+ // Optimistic: assume subQueries of the DDQ are either
+ // TermQuery or BQ OR of TermQuery; if this is wrong
+ // then we detect it and fallback to the mome general
+ // but slower DrillSidewaysCollector:
+ drillDownTerms = new Term[clauses.length-startClause][];
+ for(int i=startClause;i<clauses.length;i++) {
+ Query q = clauses[i].getQuery();
+
+ // DrillDownQuery always wraps each subQuery in
+ // ConstantScoreQuery:
+ assert q instanceof ConstantScoreQuery;
+
+ q = ((ConstantScoreQuery) q).getQuery();
+
+ if (q instanceof TermQuery) {
+ drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
+ } else if (q instanceof BooleanQuery) {
+ BooleanQuery q2 = (BooleanQuery) q;
+ BooleanClause[] clauses2 = q2.getClauses();
+ drillDownTerms[i-startClause] = new Term[clauses2.length];
+ for(int j=0;j<clauses2.length;j++) {
+ if (clauses2[j].getQuery() instanceof TermQuery) {
+ drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
+ } else {
+ useCollectorMethod = true;
+ break;
+ }
+ }
+ } else {
+ useCollectorMethod = true;
+ }
+ }
+ }
+
+ if (useCollectorMethod) {
+ // TODO: maybe we could push the "collector method"
+ // down into the optimized scorer to have a tighter
+ // integration ... and so TermQuery clauses could
+ // continue to run "optimized"
+ collectorMethod(query, baseQuery, startClause, hitCollector, drillDownCollector, drillSidewaysCollectors);
+ } else {
+ DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+ searcher.search(dsq, hitCollector);
+ }
+
+ int numDims = drillDownDims.size();
+ List<FacetResult>[] drillSidewaysResults = new List[numDims];
+ List<FacetResult> drillDownResults = null;
List<FacetResult> mergedResults = new ArrayList<FacetResult>();
+ int[] requestUpto = new int[drillDownDims.size()];
+ int ddUpto = 0;
for(int i=0;i<fsp.facetRequests.size();i++) {
FacetRequest fr = fsp.facetRequests.get(i);
assert fr.categoryPath.length > 0;
@@ -161,19 +278,119 @@ public class DrillSideways {
if (dimIndex == null) {
// Pure drill down dim (the current query didn't
// drill down on this dim):
- mergedResults.add(drillDownResults.get(i));
+ if (drillDownResults == null) {
+ // Lazy init, in case all requests were against
+ // drill-sideways dims:
+ drillDownResults = drillDownCollector.getFacetResults();
+ //System.out.println("get DD results");
+ }
+ //System.out.println("add dd results " + i);
+ mergedResults.add(drillDownResults.get(ddUpto++));
} else {
// Drill sideways dim:
- List<FacetResult> sidewaysResult = drillSidewaysCollectors[dimIndex.intValue()].getFacetResults();
-
- assert sidewaysResult.size() == 1: "size=" + sidewaysResult.size();
- mergedResults.add(sidewaysResult.get(0));
+ int dim = dimIndex.intValue();
+ List<FacetResult> sidewaysResult = drillSidewaysResults[dim];
+ if (sidewaysResult == null) {
+ // Lazy init, in case no facet request is against
+ // a given drill down dim:
+ sidewaysResult = drillSidewaysCollectors[dim].getFacetResults();
+ drillSidewaysResults[dim] = sidewaysResult;
+ }
+ mergedResults.add(sidewaysResult.get(requestUpto[dim]));
+ requestUpto[dim]++;
}
}
return new DrillSidewaysResult(mergedResults, null);
}
+ /** Uses the more general but slower method of sideways
+ * counting. This method allows an arbitrary subQuery to
+ * implement the drill down for a given dimension. */
+ private void collectorMethod(DrillDownQuery ddq, Query baseQuery, int startClause, Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors) throws IOException {
+
+ BooleanClause[] clauses = ddq.getBooleanQuery().getClauses();
+
+ Map<String,Integer> drillDownDims = ddq.getDims();
+
+ BooleanQuery topQuery = new BooleanQuery(true);
+ final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
+ drillDownDims);
+
+ // TODO: if query is already a BQ we could copy that and
+ // add clauses to it, instead of doing BQ inside BQ
+ // (should be more efficient)? Problem is this can
+ // affect scoring (coord) ... too bad we can't disable
+ // coord on a clause by clause basis:
+ topQuery.add(baseQuery, BooleanClause.Occur.MUST);
+
+ // NOTE: in theory we could just make a single BQ, with
+ // +query a b c minShouldMatch=2, but in this case,
+ // annoyingly, BS2 wraps a sub-scorer that always
+ // returns 2 as the .freq(), not how many of the
+ // SHOULD clauses matched:
+ BooleanQuery subQuery = new BooleanQuery(true);
+
+ Query wrappedSubQuery = new QueryWrapper(subQuery,
+ new SetWeight() {
+ @Override
+ public void set(Weight w) {
+ collector.setWeight(w, -1);
+ }
+ });
+ Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
+
+ // Don't impact score of original query:
+ constantScoreSubQuery.setBoost(0.0f);
+
+ topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
+
+ // Unfortunately this sub-BooleanQuery
+ // will never get BS1 because today BS1 only works
+ // if topScorer=true... and actually we cannot use BS1
+ // anyways because we need subDocsScoredAtOnce:
+ int dimIndex = 0;
+ for(int i=startClause;i<clauses.length;i++) {
+ Query q = clauses[i].getQuery();
+ // DrillDownQuery always wraps each subQuery in
+ // ConstantScoreQuery:
+ assert q instanceof ConstantScoreQuery;
+ q = ((ConstantScoreQuery) q).getQuery();
+
+ final int finalDimIndex = dimIndex;
+ subQuery.add(new QueryWrapper(q,
+ new SetWeight() {
+ @Override
+ public void set(Weight w) {
+ collector.setWeight(w, finalDimIndex);
+ }
+ }),
+ BooleanClause.Occur.SHOULD);
+ dimIndex++;
+ }
+
+ // TODO: we could better optimize the "just one drill
+ // down" case w/ a separate [specialized]
+ // collector...
+ int minShouldMatch = drillDownDims.size()-1;
+ if (minShouldMatch == 0) {
+ // Must add another "fake" clause so BQ doesn't erase
+ // itself by rewriting to the single clause:
+ Query end = new MatchAllDocsQuery();
+ end.setBoost(0.0f);
+ subQuery.add(end, BooleanClause.Occur.SHOULD);
+ minShouldMatch++;
+ }
+
+ subQuery.setMinimumNumberShouldMatch(minShouldMatch);
+
+ // System.out.println("EXE " + topQuery);
+
+ // Collects against the passed-in
+ // drillDown/SidewaysCollectors as a side effect:
+ searcher.search(topQuery, collector);
+ }
+
/**
* Search, sorting by {@link Sort}, and computing
* drill down and sideways counts.
@@ -192,9 +409,8 @@ public class DrillSideways {
doDocScores,
doMaxScore,
true);
- DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(query, hitCollector, fsp);
- r.hits = hitCollector.topDocs();
- return r;
+ DrillSidewaysResult r = search(query, hitCollector, fsp);
+ return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
} else {
return search(after, query, topN, fsp);
}
@@ -207,36 +423,102 @@ public class DrillSideways {
public DrillSidewaysResult search(ScoreDoc after,
DrillDownQuery query, int topN, FacetSearchParams fsp) throws IOException {
TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), after, true);
- DrillSidewaysResult r = new DrillSideways(searcher, taxoReader).search(query, hitCollector, fsp);
- r.hits = hitCollector.topDocs();
- return r;
+ DrillSidewaysResult r = search(query, hitCollector, fsp);
+ return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
}
/** Override this to use a custom drill-down {@link
* FacetsAccumulator}. */
- protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) {
+ protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) throws IOException {
return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
}
/** Override this to use a custom drill-sideways {@link
* FacetsAccumulator}. */
- protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) {
+ protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException {
return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
}
- /** Represents the returned result from a drill sideways
- * search. */
+ /** Override this and return true if your collector
+ * (e.g., ToParentBlockJoinCollector) expects all
+ * sub-scorers to be positioned on the document being
+ * collected. This will cause some performance loss;
+ * default is false. Note that if you return true from
+ * this method (in a subclass) be sure your collector
+ * also returns false from {@link
+ * Collector#acceptsDocsOutOfOrder}: this will trick
+ * BooleanQuery into also scoring all subDocs at once. */
+ protected boolean scoreSubDocsAtOnce() {
+ return false;
+ }
+
+ /**
+ * Represents the returned result from a drill sideways search. Note that if
+ * you called
+ * {@link DrillSideways#search(DrillDownQuery, Collector, FacetSearchParams)},
+ * then {@link #hits} will be {@code null}.
+ */
public static class DrillSidewaysResult {
/** Combined drill down & sideways results. */
public final List<FacetResult> facetResults;
/** Hits. */
- public TopDocs hits;
+ public final TopDocs hits;
- DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
+ public DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
this.facetResults = facetResults;
this.hits = hits;
}
}
+
+ private interface SetWeight {
+ public void set(Weight w);
+ }
+
+ /** Just records which Weight was given out for the
+ * (possibly rewritten) Query. */
+ private static class QueryWrapper extends Query {
+ private final Query originalQuery;
+ private final SetWeight setter;
+
+ public QueryWrapper(Query originalQuery, SetWeight setter) {
+ this.originalQuery = originalQuery;
+ this.setter = setter;
+ }
+
+ @Override
+ public Weight createWeight(final IndexSearcher searcher) throws IOException {
+ Weight w = originalQuery.createWeight(searcher);
+ setter.set(w);
+ return w;
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ Query rewritten = originalQuery.rewrite(reader);
+ if (rewritten != originalQuery) {
+ return new QueryWrapper(rewritten, setter);
+ } else {
+ return this;
+ }
+ }
+
+ @Override
+ public String toString(String s) {
+ return originalQuery.toString(s);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof QueryWrapper)) return false;
+ final QueryWrapper other = (QueryWrapper) o;
+ return super.equals(o) && originalQuery.equals(other.originalQuery);
+ }
+
+ @Override
+ public int hashCode() {
+ return super.hashCode() * 31 + originalQuery.hashCode();
+ }
+ }
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java Thu May 30 07:53:18 2013
@@ -129,8 +129,11 @@ class DrillSidewaysQuery extends Query {
dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
for(int i=0;i<drillDownTerms[dim].length;i++) {
if (termsEnum.seekExact(drillDownTerms[dim][i].bytes(), false)) {
- dims[dim].freq = Math.max(dims[dim].freq, termsEnum.docFreq());
- dims[dim].docsEnums[i] = termsEnum.docs(null, null);
+ DocsEnum docsEnum = termsEnum.docs(null, null);
+ if (docsEnum != null) {
+ dims[dim].docsEnums[i] = docsEnum;
+ dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
+ }
}
}
}
@@ -157,13 +160,34 @@ class DrillSidewaysQuery extends Query {
};
}
+ // TODO: these should do "deeper" equals/hash on the 2-D drillDownTerms array
+
@Override
public int hashCode() {
- throw new UnsupportedOperationException();
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
+ result = prime * result
+ + ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
+ result = prime * result + Arrays.hashCode(drillDownTerms);
+ result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
+ return result;
}
@Override
public boolean equals(Object obj) {
- throw new UnsupportedOperationException();
+ if (this == obj) return true;
+ if (!super.equals(obj)) return false;
+ if (getClass() != obj.getClass()) return false;
+ DrillSidewaysQuery other = (DrillSidewaysQuery) obj;
+ if (baseQuery == null) {
+ if (other.baseQuery != null) return false;
+ } else if (!baseQuery.equals(other.baseQuery)) return false;
+ if (drillDownCollector == null) {
+ if (other.drillDownCollector != null) return false;
+ } else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
+ if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
+ if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
+ return true;
}
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java Thu May 30 07:53:18 2013
@@ -63,8 +63,10 @@ class DrillSidewaysScorer extends Scorer
//}
//System.out.println("score r=" + context.reader());
collector.setScorer(this);
- drillDownCollector.setScorer(this);
- drillDownCollector.setNextReader(context);
+ if (drillDownCollector != null) {
+ drillDownCollector.setScorer(this);
+ drillDownCollector.setNextReader(context);
+ }
for(DocsEnumsAndFreq dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
@@ -76,8 +78,7 @@ class DrillSidewaysScorer extends Scorer
assert baseScorer != null;
// Position all scorers to their first matching doc:
- int baseDocID = baseScorer.nextDoc();
-
+ baseScorer.nextDoc();
for(DocsEnumsAndFreq dim : dims) {
for(DocsEnum docsEnum : dim.docsEnums) {
if (docsEnum != null) {
@@ -90,30 +91,33 @@ class DrillSidewaysScorer extends Scorer
DocsEnum[][] docsEnums = new DocsEnum[numDims][];
Collector[] sidewaysCollectors = new Collector[numDims];
- int maxFreq = 0;
+ long drillDownCost = 0;
for(int dim=0;dim<numDims;dim++) {
docsEnums[dim] = dims[dim].docsEnums;
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
- maxFreq = Math.max(maxFreq, dims[dim].freq);
+ for(DocsEnum de : dims[dim].docsEnums) {
+ if (de != null) {
+ drillDownCost += de.cost();
+ }
+ }
}
- // TODO: if we add cost API to Scorer, switch to that!
- int estBaseHitCount = context.reader().maxDoc() / (1+baseDocID);
+ long baseQueryCost = baseScorer.cost();
/*
- System.out.println("\nbaseDocID=" + baseDocID + " est=" + estBaseHitCount);
+ System.out.println("\nbaseDocID=" + baseScorer.docID() + " est=" + estBaseHitCount);
System.out.println(" maxDoc=" + context.reader().maxDoc());
- System.out.println(" maxFreq=" + maxFreq);
+ System.out.println(" maxCost=" + maxCost);
System.out.println(" dims[0].freq=" + dims[0].freq);
if (numDims > 1) {
System.out.println(" dims[1].freq=" + dims[1].freq);
}
*/
- if (estBaseHitCount < maxFreq/10) {
+ if (baseQueryCost < drillDownCost/10) {
//System.out.println("baseAdvance");
doBaseAdvanceScoring(collector, docsEnums, sidewaysCollectors);
- } else if (numDims > 1 && (dims[1].freq < estBaseHitCount/10)) {
+ } else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
//System.out.println("drillDownAdvance");
doDrillDownAdvanceScoring(collector, docsEnums, sidewaysCollectors);
} else {
@@ -391,7 +395,9 @@ class DrillSidewaysScorer extends Scorer
//}
collector.collect(collectDocID);
- drillDownCollector.collect(collectDocID);
+ if (drillDownCollector != null) {
+ drillDownCollector.collect(collectDocID);
+ }
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the
@@ -565,6 +571,9 @@ class DrillSidewaysScorer extends Scorer
// System.out.println(" now collect: " + filledCount + " hits");
//}
for(int i=0;i<filledCount;i++) {
+ // NOTE: This is actually in-order collection,
+ // because we only accept docs originally returned by
+ // the baseScorer (ie that Scorer is AND'd)
int slot = filledSlots[i];
collectDocID = docIDs[slot];
collectScore = scores[slot];
@@ -615,20 +624,31 @@ class DrillSidewaysScorer extends Scorer
}
@Override
+ public long cost() {
+ return baseScorer.cost();
+ }
+
+ @Override
public Collection<ChildScorer> getChildren() {
return Collections.singletonList(new ChildScorer(baseScorer, "MUST"));
}
static class DocsEnumsAndFreq implements Comparable<DocsEnumsAndFreq> {
DocsEnum[] docsEnums;
- // Max docFreq for all docsEnums for this dim:
- int freq;
+ // Max cost for all docsEnums for this dim:
+ long maxCost;
Collector sidewaysCollector;
String dim;
@Override
public int compareTo(DocsEnumsAndFreq other) {
- return freq - other.freq;
+ if (maxCost < other.maxCost) {
+ return -1;
+ } else if (maxCost > other.maxCost) {
+ return 1;
+ } else {
+ return 0;
+ }
}
}
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java Thu May 30 07:53:18 2013
@@ -1,5 +1,15 @@
package org.apache.lucene.facet.search;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.CollectionUtil;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -25,6 +35,140 @@ package org.apache.lucene.facet.search;
*/
public class FacetResult {
+ private static FacetResultNode addIfNotExist(Map<CategoryPath, FacetResultNode> nodes, FacetResultNode node) {
+ FacetResultNode n = nodes.get(node.label);
+ if (n == null) {
+ nodes.put(node.label, node);
+ n = node;
+ }
+ return n;
+ }
+
+ /**
+ * A utility for merging multiple {@link FacetResult} of the same
+ * (hierarchical) dimension into a single {@link FacetResult}, to reconstruct
+ * the hierarchy. The results are merged according to the following rules:
+ * <ul>
+ * <li>If two results share the same dimension (first component in their
+ * {@link CategoryPath}), they are merged.
+ * <li>If a result is missing ancestors in the other results, e.g. A/B/C but
+ * no corresponding A or A/B, these nodes are 'filled' with their label,
+ * ordinal and value (obtained from the respective {@link FacetArrays}).
+ * <li>If a result does not share a dimension with other results, it is
+ * returned as is.
+ * </ul>
+ * <p>
+ * <b>NOTE:</b> the returned results are not guaranteed to be in the same
+ * order of the input ones.
+ *
+ * @param results
+ * the results to merge
+ * @param taxoReader
+ * the {@link TaxonomyReader} to use when creating missing ancestor
+ * nodes
+ * @param dimArrays
+ * a mapping from a dimension to the respective {@link FacetArrays}
+ * from which to pull the nodes values
+ */
+ public static List<FacetResult> mergeHierarchies(List<FacetResult> results, TaxonomyReader taxoReader,
+ Map<String, FacetArrays> dimArrays) throws IOException {
+ final Map<String, List<FacetResult>> dims = new HashMap<String,List<FacetResult>>();
+ for (FacetResult fr : results) {
+ String dim = fr.getFacetRequest().categoryPath.components[0];
+ List<FacetResult> frs = dims.get(dim);
+ if (frs == null) {
+ frs = new ArrayList<FacetResult>();
+ dims.put(dim, frs);
+ }
+ frs.add(fr);
+ }
+
+ final List<FacetResult> res = new ArrayList<FacetResult>();
+ for (List<FacetResult> frs : dims.values()) {
+ FacetResult mergedResult = frs.get(0);
+ if (frs.size() > 1) {
+ CollectionUtil.introSort(frs, new Comparator<FacetResult>() {
+ @Override
+ public int compare(FacetResult fr1, FacetResult fr2) {
+ return fr1.getFacetRequest().categoryPath.compareTo(fr2.getFacetRequest().categoryPath);
+ }
+ });
+ Map<CategoryPath, FacetResultNode> mergedNodes = new HashMap<CategoryPath,FacetResultNode>();
+ FacetArrays arrays = dimArrays != null ? dimArrays.get(frs.get(0).getFacetRequest().categoryPath.components[0]) : null;
+ for (FacetResult fr : frs) {
+ FacetResultNode frn = fr.getFacetResultNode();
+ FacetResultNode merged = mergedNodes.get(frn.label);
+ if (merged == null) {
+ CategoryPath parent = frn.label.subpath(frn.label.length - 1);
+ FacetResultNode childNode = frn;
+ FacetResultNode parentNode = null;
+ while (parent.length > 0 && (parentNode = mergedNodes.get(parent)) == null) {
+ int parentOrd = taxoReader.getOrdinal(parent);
+ double parentValue = arrays != null ? fr.getFacetRequest().getValueOf(arrays, parentOrd) : -1;
+ parentNode = new FacetResultNode(parentOrd, parentValue);
+ parentNode.label = parent;
+ parentNode.subResults = new ArrayList<FacetResultNode>();
+ parentNode.subResults.add(childNode);
+ mergedNodes.put(parent, parentNode);
+ childNode = parentNode;
+ parent = parent.subpath(parent.length - 1);
+ }
+
+ // at least one parent was added, so link the final (existing)
+ // parent with the child
+ if (parent.length > 0) {
+ if (!(parentNode.subResults instanceof ArrayList)) {
+ parentNode.subResults = new ArrayList<FacetResultNode>(parentNode.subResults);
+ }
+ parentNode.subResults.add(childNode);
+ }
+
+ // for missing FRNs, add new ones with label and value=-1
+ // first time encountered this label, add it and all its children to
+ // the map.
+ mergedNodes.put(frn.label, frn);
+ for (FacetResultNode child : frn.subResults) {
+ addIfNotExist(mergedNodes, child);
+ }
+ } else {
+ if (!(merged.subResults instanceof ArrayList)) {
+ merged.subResults = new ArrayList<FacetResultNode>(merged.subResults);
+ }
+ for (FacetResultNode sub : frn.subResults) {
+ // make sure sub wasn't already added
+ sub = addIfNotExist(mergedNodes, sub);
+ if (!merged.subResults.contains(sub)) {
+ merged.subResults.add(sub);
+ }
+ }
+ }
+ }
+
+ // find the 'first' node to put on the FacetResult root
+ CategoryPath min = null;
+ for (CategoryPath cp : mergedNodes.keySet()) {
+ if (min == null || cp.compareTo(min) < 0) {
+ min = cp;
+ }
+ }
+ FacetRequest dummy = new FacetRequest(min, frs.get(0).getFacetRequest().numResults) {
+ @Override
+ public double getValueOf(FacetArrays arrays, int idx) {
+ throw new UnsupportedOperationException("not supported by this request");
+ }
+
+ @Override
+ public FacetArraysSource getFacetArraysSource() {
+ throw new UnsupportedOperationException("not supported by this request");
+ }
+ };
+ mergedResult = new FacetResult(dummy, mergedNodes.get(min), -1);
+ }
+ res.add(mergedResult);
+ }
+ return res;
+ }
+
private final FacetRequest facetRequest;
private final FacetResultNode rootNode;
private final int numValidDescendants;
@@ -41,19 +185,15 @@ public class FacetResult {
* @see FacetRequest#categoryPath
*/
public final FacetResultNode getFacetResultNode() {
- return this.rootNode;
+ return rootNode;
}
/**
* Number of descendants of {@link #getFacetResultNode() root facet result
- * node}, up till the requested depth. Typically -- have value != 0. This
- * number does not include the root node.
- *
- * @see #getFacetRequest()
- * @see FacetRequest#getDepth()
+ * node}, up till the requested depth.
*/
public final int getNumValidDescendants() {
- return this.numValidDescendants;
+ return numValidDescendants;
}
/**
@@ -62,7 +202,7 @@ public class FacetResult {
public final FacetRequest getFacetRequest() {
return this.facetRequest;
}
-
+
/**
* String representation of this facet result.
* Use with caution: might return a very long string.
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResultNode.java Thu May 30 07:53:18 2013
@@ -67,10 +67,6 @@ public class FacetResultNode {
*/
public List<FacetResultNode> subResults = EMPTY_SUB_RESULTS;
- public FacetResultNode() {
- // empty constructor
- }
-
public FacetResultNode(int ordinal, double value) {
this.ordinal = ordinal;
this.value = value;
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java Thu May 30 07:53:18 2013
@@ -15,8 +15,8 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
import org.apache.lucene.index.IndexReader;
/*
@@ -58,7 +58,7 @@ public class FacetsAccumulator {
* constructor.
*/
public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
- this(searchParams, indexReader, taxonomyReader, null);
+ this(searchParams, indexReader, taxonomyReader, new FacetArrays(taxonomyReader.getSize()));
}
/**
@@ -81,6 +81,13 @@ public class FacetsAccumulator {
return new FacetsAccumulator(fsp, indexReader, taxoReader);
}
+ /** Returns an empty {@link FacetResult}. */
+ protected static FacetResult emptyResult(int ordinal, FacetRequest fr) {
+ FacetResultNode root = new FacetResultNode(ordinal, 0);
+ root.label = fr.categoryPath;
+ return new FacetResult(fr, root, 0);
+ }
+
/**
* Initializes the accumulator with the given parameters as well as
* {@link FacetArrays}. Note that the accumulator doesn't call
@@ -90,9 +97,6 @@ public class FacetsAccumulator {
*/
public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader,
FacetArrays facetArrays) {
- if (facetArrays == null) {
- facetArrays = new FacetArrays(taxonomyReader.getSize());
- }
this.facetArrays = facetArrays;
this.indexReader = indexReader;
this.taxonomyReader = taxonomyReader;
@@ -173,19 +177,17 @@ public class FacetsAccumulator {
for (FacetRequest fr : searchParams.facetRequests) {
int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath);
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
- // Add empty FacetResult:
- FacetResultNode root = new FacetResultNode();
- root.ordinal = TaxonomyReader.INVALID_ORDINAL;
- root.label = fr.categoryPath;
- root.value = 0;
- res.add(new FacetResult(fr, root, 0));
+ // Add empty FacetResult
+ res.add(emptyResult(rootOrd, fr));
continue;
}
CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath);
- OrdinalPolicy ordinalPolicy = clp .getOrdinalPolicy(fr.categoryPath.components[0]);
- if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
- // rollup values
- aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays);
+ if (fr.categoryPath.length > 0) { // someone might ask to aggregate the ROOT category
+ OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
+ if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
+ // rollup values
+ aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays);
+ }
}
FacetResultsHandler frh = createFacetResultsHandler(fr);
@@ -194,4 +196,7 @@ public class FacetsAccumulator {
return res;
}
+ public boolean requiresDocScores() {
+ return getAggregator().requiresDocScores();
+ }
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java Thu May 30 07:53:18 2013
@@ -87,7 +87,7 @@ public abstract class FacetsCollector ex
}
@Override
- public final void setNextReader(AtomicReaderContext context) throws IOException {
+ protected final void doSetNextReader(AtomicReaderContext context) throws IOException {
if (bits != null) {
matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, scores));
}
@@ -133,7 +133,7 @@ public abstract class FacetsCollector ex
public final void setScorer(Scorer scorer) throws IOException {}
@Override
- public final void setNextReader(AtomicReaderContext context) throws IOException {
+ protected final void doSetNextReader(AtomicReaderContext context) throws IOException {
if (bits != null) {
matchingDocs.add(new MatchingDocs(this.context, bits, totalHits, null));
}
@@ -175,7 +175,7 @@ public abstract class FacetsCollector ex
* given {@link FacetsAccumulator}.
*/
public static FacetsCollector create(FacetsAccumulator accumulator) {
- if (accumulator.getAggregator().requiresDocScores()) {
+ if (accumulator.requiresDocScores()) {
return new DocsAndScoresCollector(accumulator);
} else {
return new DocsOnlyCollector(accumulator);
@@ -183,6 +183,7 @@ public abstract class FacetsCollector ex
}
private final FacetsAccumulator accumulator;
+ private List<FacetResult> cachedResults;
protected final List<MatchingDocs> matchingDocs = new ArrayList<MatchingDocs>();
@@ -196,15 +197,24 @@ public abstract class FacetsCollector ex
*/
protected abstract void finish();
+ /** Performs the actual work of {@link #setNextReader(AtomicReaderContext)}. */
+ protected abstract void doSetNextReader(AtomicReaderContext context) throws IOException;
+
/**
* Returns a {@link FacetResult} per {@link FacetRequest} set in
- * {@link FacetSearchParams}. Note that if one of the {@link FacetRequest
- * requests} is for a {@link CategoryPath} that does not exist in the taxonomy,
- * no matching {@link FacetResult} will be returned.
+ * {@link FacetSearchParams}. Note that if a {@link FacetRequest} defines a
+ * {@link CategoryPath} which does not exist in the taxonomy, an empty
+ * {@link FacetResult} will be returned for it.
*/
public final List<FacetResult> getFacetResults() throws IOException {
- finish();
- return accumulator.accumulate(matchingDocs);
+ // LUCENE-4893: if results are not cached, counts are multiplied as many
+ // times as this method is called.
+ if (cachedResults == null) {
+ finish();
+ cachedResults = accumulator.accumulate(matchingDocs);
+ }
+
+ return cachedResults;
}
/**
@@ -218,12 +228,22 @@ public abstract class FacetsCollector ex
/**
* Allows to reuse the collector between search requests. This method simply
- * clears all collected documents (and scores) information, and does not
- * attempt to reuse allocated memory spaces.
+ * clears all collected documents (and scores) information (as well as cached
+ * results), and does not attempt to reuse allocated memory spaces.
*/
public final void reset() {
finish();
matchingDocs.clear();
+ cachedResults = null;
}
+ @Override
+ public final void setNextReader(AtomicReaderContext context) throws IOException {
+ // clear cachedResults - needed in case someone called getFacetResults()
+ // before doing a search and didn't call reset(). Defensive code to prevent
+ // traps.
+ cachedResults = null;
+ doSetNextReader(context);
+ }
+
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -83,6 +83,7 @@ public final class FastCountingFacetsAgg
byte b = buf.bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
+ assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
++counts[ord];
ord = 0;
} else {
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/FloatFacetResultsHandler.java Thu May 30 07:53:18 2013
@@ -43,18 +43,19 @@ public final class FloatFacetResultsHand
return values[ordinal];
}
-
@Override
protected final int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq) {
FacetResultNode top = pq.top();
int numResults = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
float value = values[ordinal];
- if (value > top.value) {
- top.value = value;
- top.ordinal = ordinal;
- top = pq.updateTop();
+ if (value > 0.0f) {
++numResults;
+ if (value > top.value) {
+ top.value = value;
+ top.ordinal = ordinal;
+ top = pq.updateTop();
+ }
}
ordinal = siblings[ordinal];
}
@@ -66,9 +67,8 @@ public final class FloatFacetResultsHand
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
float value = values[ordinal];
if (value > 0) {
- FacetResultNode node = new FacetResultNode();
+ FacetResultNode node = new FacetResultNode(ordinal, value);
node.label = taxonomyReader.getPath(ordinal);
- node.value = value;
nodes.add(node);
}
ordinal = siblings[ordinal];
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/IntFacetResultsHandler.java Thu May 30 07:53:18 2013
@@ -49,11 +49,13 @@ public final class IntFacetResultsHandle
int numResults = 0;
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
int value = values[ordinal];
- if (value > top.value) {
- top.value = value;
- top.ordinal = ordinal;
- top = pq.updateTop();
+ if (value > 0) {
++numResults;
+ if (value > top.value) {
+ top.value = value;
+ top.ordinal = ordinal;
+ top = pq.updateTop();
+ }
}
ordinal = siblings[ordinal];
}
@@ -65,9 +67,8 @@ public final class IntFacetResultsHandle
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
int value = values[ordinal];
if (value > 0) {
- FacetResultNode node = new FacetResultNode();
+ FacetResultNode node = new FacetResultNode(ordinal, value);
node.label = taxonomyReader.getPath(ordinal);
- node.value = value;
nodes.add(node);
}
ordinal = siblings[ordinal];
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/MatchingDocsAsScoredDocIDs.java Thu May 30 07:53:18 2013
@@ -153,6 +153,11 @@ public class MatchingDocsAsScoredDocIDs
}
@Override
+ public long cost() {
+ return size;
+ }
+
+ @Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException("not supported");
}
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java Thu May 30 07:53:18 2013
@@ -94,7 +94,7 @@ public class StandardFacetsAccumulator e
private Object accumulateGuard;
- private double complementThreshold;
+ private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD;
public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
@@ -198,11 +198,7 @@ public class StandardFacetsAccumulator e
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
// Add empty FacetResult:
- FacetResultNode root = new FacetResultNode();
- root.ordinal = TaxonomyReader.INVALID_ORDINAL;
- root.label = fr.categoryPath;
- root.value = 0;
- res.add(new FacetResult(fr, root, 0));
+ res.add(emptyResult(taxonomyReader.getOrdinal(fr.categoryPath), fr));
continue;
}
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
Modified: lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java?rev=1487777&r1=1487776&r2=1487777&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java (original)
+++ lucene/dev/branches/security/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetsAggregator.java Thu May 30 07:53:18 2013
@@ -42,11 +42,13 @@ public class SumScoreFacetsAggregator im
int doc = 0;
int length = matchingDocs.bits.length();
float[] scores = facetArrays.getFloatArray();
+ int scoresIdx = 0;
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
cli.getOrdinals(doc, ordinals);
int upto = ordinals.offset + ordinals.length;
+ final float score = matchingDocs.scores[scoresIdx++];
for (int i = ordinals.offset; i < upto; i++) {
- scores[ordinals.ints[i]] += matchingDocs.scores[doc];
+ scores[ordinals.ints[i]] += score;
}
++doc;
}