You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cm...@apache.org on 2013/08/11 14:19:39 UTC
svn commit: r1512909 [15/38] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/idea/lucene/suggest/ dev-tools/idea/solr/contrib/dataimporthandler/
dev-tools/idea/solr/core/src/test/ dev-too...
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/FacetPackage.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/FacetPackage.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/FacetPackage.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/FacetPackage.java Sun Aug 11 12:19:13 2013
@@ -17,11 +17,7 @@ package org.apache.lucene.facet;
* limitations under the License.
*/
-/**
- * Required so that userguide files are copied as part of javadocs generation.
- * Otherwise, if the root facet package contains no classes, doc-files aren't
- * copied.
- */
+/** Required for javadocs generation. */
public final class FacetPackage {
private FacetPackage() {}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsListBuilder.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsListBuilder.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsListBuilder.java Sun Aug 11 12:19:13 2013
@@ -54,13 +54,6 @@ public class AssociationsListBuilder imp
// build per-association key BytesRef
CategoryAssociation association = associations.getAssociation(cp);
- if (association == null) {
- // it is ok to set a null association for a category - it's treated as a
- // regular category in that case.
- ++idx;
- continue;
- }
-
BytesRef bytes = res.get(association.getCategoryListID());
if (bytes == null) {
bytes = new BytesRef(32);
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryAssociationsContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryAssociationsContainer.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryAssociationsContainer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryAssociationsContainer.java Sun Aug 11 12:19:13 2013
@@ -30,11 +30,12 @@ public class CategoryAssociationsContain
/**
* Adds the {@link CategoryAssociation} for the given {@link CategoryPath
- * category}. Overrides any assocation that was previously set. It is ok to
- * pass {@code null}, in which case the category will be treated as a regular
- * one (i.e. without association value).
+ * category}. Overrides any assocation that was previously set.
*/
public void setAssociation(CategoryPath category, CategoryAssociation association) {
+ if (association == null) {
+ throw new IllegalArgumentException("cannot set a null association to a category");
+ }
categoryAssociations.put(category, association);
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetsAggregator.java Sun Aug 11 12:19:13 2013
@@ -7,6 +7,8 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.search.OrdinalValueResolver;
+import org.apache.lucene.facet.search.OrdinalValueResolver.FloatValueResolver;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@@ -54,23 +56,20 @@ public class SumFloatAssociationFacetsAg
int doc = 0;
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
dv.get(doc, bytes);
- if (bytes.length == 0) {
- continue; // no associations for this document
+ if (bytes.length > 0) {
+ // aggreate float association values for ordinals
+ int bytesUpto = bytes.offset + bytes.length;
+ int pos = bytes.offset;
+ while (pos < bytesUpto) {
+ int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
+ | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
+
+ int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
+ | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
+
+ values[ordinal] += Float.intBitsToFloat(value);
+ }
}
-
- // aggreate float association values for ordinals
- int bytesUpto = bytes.offset + bytes.length;
- int pos = bytes.offset;
- while (pos < bytesUpto) {
- int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
- | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
-
- int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
- | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
-
- values[ordinal] += Float.intBitsToFloat(value);
- }
-
++doc;
}
}
@@ -84,5 +83,10 @@ public class SumFloatAssociationFacetsAg
public void rollupValues(FacetRequest fr, int ordinal, int[] children, int[] siblings, FacetArrays facetArrays) {
// NO-OP: this aggregator does no rollup values to the parents.
}
+
+ @Override
+ public OrdinalValueResolver createOrdinalValueResolver(FacetRequest facetRequest, FacetArrays arrays) {
+ return new FloatValueResolver(arrays);
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetsAggregator.java Sun Aug 11 12:19:13 2013
@@ -7,6 +7,8 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.search.OrdinalValueResolver;
+import org.apache.lucene.facet.search.OrdinalValueResolver.IntValueResolver;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@@ -53,23 +55,20 @@ public class SumIntAssociationFacetsAggr
int doc = 0;
while (doc < length && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) {
dv.get(doc, bytes);
- if (bytes.length == 0) {
- continue; // no associations for this document
+ if (bytes.length > 0) {
+ // aggreate association values for ordinals
+ int bytesUpto = bytes.offset + bytes.length;
+ int pos = bytes.offset;
+ while (pos < bytesUpto) {
+ int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
+ | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
+
+ int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
+ | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
+
+ values[ordinal] += value;
+ }
}
-
- // aggreate association values for ordinals
- int bytesUpto = bytes.offset + bytes.length;
- int pos = bytes.offset;
- while (pos < bytesUpto) {
- int ordinal = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
- | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
-
- int value = ((bytes.bytes[pos++] & 0xFF) << 24) | ((bytes.bytes[pos++] & 0xFF) << 16)
- | ((bytes.bytes[pos++] & 0xFF) << 8) | (bytes.bytes[pos++] & 0xFF);
-
- values[ordinal] += value;
- }
-
++doc;
}
}
@@ -84,4 +83,9 @@ public class SumIntAssociationFacetsAggr
// NO-OP: this aggregator does no rollup values to the parents.
}
+ @Override
+ public OrdinalValueResolver createOrdinalValueResolver(FacetRequest facetRequest, FacetArrays arrays) {
+ return new IntValueResolver(arrays);
+ }
+
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/codecs/facet42/package.html Sun Aug 11 12:19:13 2013
@@ -16,9 +16,6 @@
limitations under the License.
-->
<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
<body>
Codec + DocValuesFormat that are optimized for facets.
</body>
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/complements/TotalFacetCounts.java Sun Aug 11 12:19:13 2013
@@ -11,20 +11,20 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.lucene.facet.old.Aggregator;
+import org.apache.lucene.facet.old.CountingAggregator;
+import org.apache.lucene.facet.old.OldFacetsAccumulator;
+import org.apache.lucene.facet.old.ScoredDocIdsUtils;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
-import org.apache.lucene.facet.search.Aggregator;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.CountFacetRequest;
-import org.apache.lucene.facet.search.CountingAggregator;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
-import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
-import org.apache.lucene.facet.util.ScoredDocIdsUtils;
import org.apache.lucene.index.IndexReader;
/*
@@ -159,7 +159,7 @@ public class TotalFacetCounts {
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams, DUMMY_REQ);
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
- StandardFacetsAccumulator sfa = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
+ OldFacetsAccumulator sfa = new OldFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
@Override
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
FacetArrays facetArrays, int partition) throws IOException {
@@ -172,7 +172,7 @@ public class TotalFacetCounts {
return map;
}
};
- sfa.setComplementThreshold(StandardFacetsAccumulator.DISABLE_COMPLEMENT);
+ sfa.setComplementThreshold(OldFacetsAccumulator.DISABLE_COMPLEMENT);
sfa.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader));
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/index/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/index/package.html?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/index/package.html (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/index/package.html Sun Aug 11 12:19:13 2013
@@ -20,6 +20,5 @@
</head>
<body>
Facets indexing code.
-
</body>
</html>
\ No newline at end of file
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java Sun Aug 11 12:19:13 2013
@@ -23,13 +23,9 @@ import org.apache.lucene.facet.search.Fa
*/
/**
- * Defines parameters that are needed for faceted search. The list of
- * {@link FacetRequest facet requests} denotes the facets for which aggregated
- * should be done.
- * <p>
- * One can pass {@link FacetIndexingParams} in order to tell the search code how
- * to read the facets information. Note that you must use the same
- * {@link FacetIndexingParams} that were used for indexing.
+ * Defines parameters that are needed for faceted search: the list of facet
+ * {@link FacetRequest facet requests} which should be aggregated as well as the
+ * {@link FacetIndexingParams indexing params} that were used to index them.
*
* @lucene.experimental
*/
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/partitions/PartitionsFacetResultsHandler.java Sun Aug 11 12:19:13 2013
@@ -2,13 +2,14 @@ package org.apache.lucene.facet.partitio
import java.io.IOException;
+import org.apache.lucene.facet.old.OldFacetsAccumulator;
+import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetResultsHandler;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.search.OrdinalValueResolver;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
@@ -36,11 +37,10 @@ import org.apache.lucene.facet.taxonomy.
public abstract class PartitionsFacetResultsHandler extends FacetResultsHandler {
public PartitionsFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest,
- FacetArrays facetArrays) {
- super(taxonomyReader, facetRequest, facetArrays);
+ OrdinalValueResolver resolver, FacetArrays facetArrays) {
+ super(taxonomyReader, facetRequest, resolver, facetArrays);
}
-
/**
* Fetch results of a single partition, given facet arrays for that partition,
* and based on the matching documents and faceted search parameters.
@@ -103,7 +103,7 @@ public abstract class PartitionsFacetRes
/**
* Label results according to settings in {@link FacetRequest}, such as
* {@link FacetRequest#getNumLabel()}. Usually invoked by
- * {@link StandardFacetsAccumulator#accumulate(ScoredDocIDs)}
+ * {@link OldFacetsAccumulator#accumulate(ScoredDocIDs)}
*
* @param facetResult
* facet result to be labeled.
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java Sun Aug 11 12:19:13 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.facet.range;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import org.apache.lucene.facet.params.FacetSearchParams;
@@ -26,10 +27,8 @@ import org.apache.lucene.facet.search.Fa
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAccumulator;
-import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.NumericDocValues;
/** Uses a {@link NumericDocValues} and accumulates
@@ -51,49 +50,52 @@ public class RangeAccumulator extends Fa
final List<RangeSet> requests = new ArrayList<RangeSet>();
- public RangeAccumulator(FacetSearchParams fsp, IndexReader reader) {
- super(fsp, reader, null, null);
-
- for(FacetRequest fr : fsp.facetRequests) {
-
+ public RangeAccumulator(FacetRequest... facetRequests) {
+ this(Arrays.asList(facetRequests));
+ }
+
+ public RangeAccumulator(List<FacetRequest> facetRequests) {
+ super(new FacetSearchParams(facetRequests));
+ for (FacetRequest fr : facetRequests) {
if (!(fr instanceof RangeFacetRequest)) {
- throw new IllegalArgumentException("only RangeFacetRequest is supported; got " + fsp.facetRequests.get(0).getClass());
+ throw new IllegalArgumentException("this accumulator only supports RangeFacetRequest; got " + fr);
}
if (fr.categoryPath.length != 1) {
throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed");
}
-
- RangeFacetRequest<?> rfr = (RangeFacetRequest) fr;
-
- requests.add(new RangeSet(rfr.ranges, rfr.categoryPath.components[0]));
+
+ RangeFacetRequest<?> rfr = (RangeFacetRequest<?>) fr;
+ requests.add(new RangeSet(rfr.ranges, fr.categoryPath.components[0]));
}
}
@Override
- public FacetsAggregator getAggregator() {
- throw new UnsupportedOperationException();
- }
-
- @Override
public List<FacetResult> accumulate(List<MatchingDocs> matchingDocs) throws IOException {
// TODO: test if this is faster (in the past it was
// faster to do MachingDocs on the inside) ... see
// patches on LUCENE-4965):
List<FacetResult> results = new ArrayList<FacetResult>();
- for(int i=0;i<requests.size();i++) {
+ for (int i = 0; i < requests.size(); i++) {
RangeSet ranges = requests.get(i);
int[] counts = new int[ranges.ranges.length];
- for(MatchingDocs hits : matchingDocs) {
+ for (MatchingDocs hits : matchingDocs) {
NumericDocValues ndv = hits.context.reader().getNumericDocValues(ranges.field);
+ if (ndv == null) {
+ continue; // no numeric values for this field in this reader
+ }
final int length = hits.bits.length();
int doc = 0;
while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
long v = ndv.get(doc);
+ // TODO: if all ranges are non-overlapping, we
+ // should instead do a bin-search up front
+ // (really, a specialized case of the interval
+ // tree)
// TODO: use interval tree instead of linear search:
- for(int j=0;j<ranges.ranges.length;j++) {
+ for (int j = 0; j < ranges.ranges.length; j++) {
if (ranges.ranges[j].accept(v)) {
counts[j]++;
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetRequest.java Sun Aug 11 12:19:13 2013
@@ -19,11 +19,10 @@ package org.apache.lucene.facet.range;
import java.util.List;
-import org.apache.lucene.facet.search.Aggregator;
-import org.apache.lucene.facet.search.FacetArrays;
+import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetRequest;
+import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/**
* Facet request for dynamic ranges based on a
@@ -36,6 +35,7 @@ public class RangeFacetRequest<T extends
public final Range[] ranges;
+ @SuppressWarnings("unchecked")
public RangeFacetRequest(String field, T...ranges) {
super(new CategoryPath(field), 1);
this.ranges = ranges;
@@ -47,18 +47,8 @@ public class RangeFacetRequest<T extends
}
@Override
- public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public double getValueOf(FacetArrays arrays, int ordinal) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public FacetArraysSource getFacetArraysSource() {
- throw new UnsupportedOperationException();
+ public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
+ return null;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RandomSampler.java Sun Aug 11 12:19:13 2013
@@ -3,9 +3,9 @@ package org.apache.lucene.facet.sampling
import java.io.IOException;
import java.util.Random;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+import org.apache.lucene.facet.old.ScoredDocIDs;
+import org.apache.lucene.facet.old.ScoredDocIDsIterator;
+import org.apache.lucene.facet.old.ScoredDocIdsUtils;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/RepeatableSampler.java Sun Aug 11 12:19:13 2013
@@ -5,12 +5,11 @@ import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.apache.lucene.facet.old.ScoredDocIDs;
+import org.apache.lucene.facet.old.ScoredDocIDsIterator;
+import org.apache.lucene.facet.old.ScoredDocIdsUtils;
import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-import org.apache.lucene.facet.util.ScoredDocIdsUtils;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SampleFixer.java Sun Aug 11 12:19:13 2013
@@ -2,8 +2,9 @@ package org.apache.lucene.facet.sampling
import java.io.IOException;
+import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.FacetResultNode;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,22 +24,50 @@ import org.apache.lucene.facet.search.Sc
*/
/**
- * Fixer of sample facet accumulation results
+ * Fixer of sample facet accumulation results.
*
* @lucene.experimental
*/
-public interface SampleFixer {
+public abstract class SampleFixer {
/**
* Alter the input result, fixing it to account for the sampling. This
- * implementation can compute accurate or estimated counts for the sampled facets.
- * For example, a faster correction could just multiply by a compensating factor.
+ * implementation can compute accurate or estimated counts for the sampled
+ * facets. For example, a faster correction could just multiply by a
+ * compensating factor.
*
* @param origDocIds
* full set of matching documents.
* @param fres
* sample result to be fixed.
- * @throws IOException If there is a low-level I/O error.
+ * @throws IOException
+ * If there is a low-level I/O error.
*/
- public void fixResult(ScoredDocIDs origDocIds, FacetResult fres) throws IOException;
+ public void fixResult(ScoredDocIDs origDocIds, FacetResult fres, double samplingRatio) throws IOException {
+ FacetResultNode topRes = fres.getFacetResultNode();
+ fixResultNode(topRes, origDocIds, samplingRatio);
+ }
+
+ /**
+ * Fix result node count, and, recursively, fix all its children
+ *
+ * @param facetResNode
+ * result node to be fixed
+ * @param docIds
+ * docids in effect
+ * @throws IOException
+ * If there is a low-level I/O error.
+ */
+ protected void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
+ throws IOException {
+ singleNodeFix(facetResNode, docIds, samplingRatio);
+ for (FacetResultNode frn : facetResNode.subResults) {
+ fixResultNode(frn, docIds, samplingRatio);
+ }
+ }
+
+ /** Fix the given node's value. */
+ protected abstract void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio)
+ throws IOException;
+
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java Sun Aug 11 12:19:13 2013
@@ -4,15 +4,13 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.facet.old.ScoredDocIDs;
+import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
-import org.apache.lucene.facet.search.Aggregator;
-import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.facet.search.FacetsAggregator;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -111,16 +109,6 @@ public abstract class Sampler {
throws IOException;
/**
- * Get a fixer of sample facet accumulation results. Default implementation
- * returns a <code>TakmiSampleFixer</code> which is adequate only for
- * counting. For any other accumulator, provide a different fixer.
- */
- public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
- FacetSearchParams searchParams) {
- return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
- }
-
- /**
* Result of sample computation
*/
public final static class SampleResult {
@@ -207,38 +195,21 @@ public abstract class Sampler {
return res;
}
- /**
- * Wrapping a facet request for over sampling.
- * Implementation detail: even if the original request is a count request, no
- * statistics will be computed for it as the wrapping is not a count request.
- * This is ok, as the sampling accumulator is later computing the statistics
- * over the original requests.
- */
- private static class OverSampledFacetRequest extends FacetRequest {
- final FacetRequest orig;
+ /** Wrapping a facet request for over sampling. */
+ public static class OverSampledFacetRequest extends FacetRequest {
+ public final FacetRequest orig;
public OverSampledFacetRequest(FacetRequest orig, int num) {
super(orig.categoryPath, num);
this.orig = orig;
setDepth(orig.getDepth());
- setNumLabel(orig.getNumLabel());
+ setNumLabel(0); // don't label anything as we're over-sampling
setResultMode(orig.getResultMode());
setSortOrder(orig.getSortOrder());
}
@Override
- public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
- throws IOException {
- return orig.createAggregator(useComplements, arrays, taxonomy);
- }
-
- @Override
- public FacetArraysSource getFacetArraysSource() {
- return orig.getFacetArraysSource();
- }
-
- @Override
- public double getValueOf(FacetArrays arrays, int idx) {
- return orig.getValueOf(arrays, idx);
+ public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
+ return orig.createFacetsAggregator(fip);
}
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingAccumulator.java Sun Aug 11 12:19:13 2013
@@ -4,14 +4,15 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.facet.old.OldFacetsAccumulator;
+import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.search.FacetArrays;
+import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsAccumulator;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
@@ -38,10 +39,10 @@ import org.apache.lucene.index.IndexRead
* Note two major differences between this class and {@link SamplingWrapper}:
* <ol>
* <li>Latter can wrap any other {@link FacetsAccumulator} while this class
- * directly extends {@link StandardFacetsAccumulator}.</li>
+ * directly extends {@link OldFacetsAccumulator}.</li>
* <li>This class can effectively apply sampling on the complement set of
* matching document, thereby working efficiently with the complement
- * optimization - see {@link StandardFacetsAccumulator#getComplementThreshold()}
+ * optimization - see {@link OldFacetsAccumulator#getComplementThreshold()}
* .</li>
* </ol>
* <p>
@@ -52,7 +53,7 @@ import org.apache.lucene.index.IndexRead
* @see Sampler
* @lucene.experimental
*/
-public class SamplingAccumulator extends StandardFacetsAccumulator {
+public class SamplingAccumulator extends OldFacetsAccumulator {
private double samplingRatio = -1d;
private final Sampler sampler;
@@ -79,30 +80,44 @@ public class SamplingAccumulator extends
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// Replacing the original searchParams with the over-sampled
FacetSearchParams original = searchParams;
- searchParams = sampler.overSampledSearchParams(original);
+ SampleFixer samplerFixer = sampler.samplingParams.getSampleFixer();
+ final boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+ if (shouldOversample) {
+ searchParams = sampler.overSampledSearchParams(original);
+ }
List<FacetResult> sampleRes = super.accumulate(docids);
- List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+ List<FacetResult> results = new ArrayList<FacetResult>();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
- PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
- // fix the result of current request
- sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres);
+ FacetRequest fr = fres.getFacetRequest();
+ PartitionsFacetResultsHandler frh = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
+ if (samplerFixer != null) {
+ // fix the result of current request
+ samplerFixer.fixResult(docids, fres, samplingRatio);
+
+ fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
+
+ if (shouldOversample) {
+ // Using the sampler to trim the extra (over-sampled) results
+ fres = sampler.trimResult(fres);
+ }
+ }
- fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any arranging it needs to
-
- // Using the sampler to trim the extra (over-sampled) results
- fres = sampler.trimResult(fres);
-
// final labeling if allowed (because labeling is a costly operation)
- frh.labelResult(fres);
- fixedRes.add(fres); // add to final results
+ if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
+ // category does not exist, add an empty result
+ results.add(emptyResult(fres.getFacetResultNode().ordinal, fr));
+ } else {
+ frh.labelResult(fres);
+ results.add(fres);
+ }
}
searchParams = original; // Back to original params
- return fixedRes;
+ return results;
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingParams.java Sun Aug 11 12:19:13 2013
@@ -28,23 +28,23 @@ public class SamplingParams {
* Default factor by which more results are requested over the sample set.
* @see SamplingParams#getOversampleFactor()
*/
- public static final double DEFAULT_OVERSAMPLE_FACTOR = 2d;
+ public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
/**
* Default ratio between size of sample to original size of document set.
- * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
+ * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public static final double DEFAULT_SAMPLE_RATIO = 0.01;
/**
* Default maximum size of sample.
- * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
+ * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000;
/**
* Default minimum size of sample.
- * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
+ * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public static final int DEFAULT_MIN_SAMPLE_SIZE = 100;
@@ -59,11 +59,13 @@ public class SamplingParams {
private double sampleRatio = DEFAULT_SAMPLE_RATIO;
private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
+
+ private SampleFixer sampleFixer = null;
/**
* Return the maxSampleSize.
* In no case should the resulting sample size exceed this value.
- * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
+ * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public final int getMaxSampleSize() {
return maxSampleSize;
@@ -72,7 +74,7 @@ public class SamplingParams {
/**
* Return the minSampleSize.
* In no case should the resulting sample size be smaller than this value.
- * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
+ * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public final int getMinSampleSize() {
return minSampleSize;
@@ -80,7 +82,7 @@ public class SamplingParams {
/**
* @return the sampleRatio
- * @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
+ * @see Sampler#getSampleSet(org.apache.lucene.facet.old.ScoredDocIDs)
*/
public final double getSampleRatio() {
return sampleRatio;
@@ -166,4 +168,29 @@ public class SamplingParams {
this.oversampleFactor = oversampleFactor;
}
-}
\ No newline at end of file
+ /**
+ * @return {@link SampleFixer} to be used while fixing the sampled results, if
+ * <code>null</code> no fixing will be performed
+ */
+ public SampleFixer getSampleFixer() {
+ return sampleFixer;
+ }
+
+ /**
+ * Set a {@link SampleFixer} to be used while fixing the sampled results.
+ * {@code null} means no fixing will be performed
+ */
+ public void setSampleFixer(SampleFixer sampleFixer) {
+ this.sampleFixer = sampleFixer;
+ }
+
+ /**
+ * Returns whether over-sampling should be done. By default returns
+ * {@code true} when {@link #getSampleFixer()} is not {@code null} and
+ * {@link #getOversampleFactor()} > 1, {@code false} otherwise.
+ */
+ public boolean shouldOverSample() {
+ return sampleFixer != null && oversampleFactor > 1d;
+ }
+
+}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/SamplingWrapper.java Sun Aug 11 12:19:13 2013
@@ -4,12 +4,14 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import org.apache.lucene.facet.old.OldFacetsAccumulator;
+import org.apache.lucene.facet.old.ScoredDocIDs;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.partitions.PartitionsFacetResultsHandler;
import org.apache.lucene.facet.sampling.Sampler.SampleResult;
+import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.StandardFacetsAccumulator;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,12 +39,12 @@ import org.apache.lucene.facet.search.St
*
* @lucene.experimental
*/
-public class SamplingWrapper extends StandardFacetsAccumulator {
+public class SamplingWrapper extends OldFacetsAccumulator {
- private StandardFacetsAccumulator delegee;
+ private OldFacetsAccumulator delegee;
private Sampler sampler;
- public SamplingWrapper(StandardFacetsAccumulator delegee, Sampler sampler) {
+ public SamplingWrapper(OldFacetsAccumulator delegee, Sampler sampler) {
super(delegee.searchParams, delegee.indexReader, delegee.taxonomyReader);
this.delegee = delegee;
this.sampler = sampler;
@@ -52,31 +54,49 @@ public class SamplingWrapper extends Sta
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// Replacing the original searchParams with the over-sampled (and without statistics-compute)
FacetSearchParams original = delegee.searchParams;
- delegee.searchParams = sampler.overSampledSearchParams(original);
+ boolean shouldOversample = sampler.samplingParams.shouldOverSample();
+
+ if (shouldOversample) {
+ delegee.searchParams = sampler.overSampledSearchParams(original);
+ }
SampleResult sampleSet = sampler.getSampleSet(docids);
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
- List<FacetResult> fixedRes = new ArrayList<FacetResult>();
+ List<FacetResult> results = new ArrayList<FacetResult>();
+ SampleFixer sampleFixer = sampler.samplingParams.getSampleFixer();
+
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
- PartitionsFacetResultsHandler frh = createFacetResultsHandler(fres.getFacetRequest());
- // fix the result of current request
- sampler.getSampleFixer(indexReader, taxonomyReader, searchParams).fixResult(docids, fres);
- fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+ FacetRequest fr = fres.getFacetRequest();
+ PartitionsFacetResultsHandler frh = createFacetResultsHandler(fr, createOrdinalValueResolver(fr));
+ if (sampleFixer != null) {
+ // fix the result of current request
+ sampleFixer.fixResult(docids, fres, sampleSet.actualSampleRatio);
+ fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
+ }
- // Using the sampler to trim the extra (over-sampled) results
- fres = sampler.trimResult(fres);
+ if (shouldOversample) {
+ // Using the sampler to trim the extra (over-sampled) results
+ fres = sampler.trimResult(fres);
+ }
// final labeling if allowed (because labeling is a costly operation)
- frh.labelResult(fres);
- fixedRes.add(fres); // add to final results
+ if (fres.getFacetResultNode().ordinal == TaxonomyReader.INVALID_ORDINAL) {
+ // category does not exist, add an empty result
+ results.add(emptyResult(fres.getFacetResultNode().ordinal, fr));
+ } else {
+ frh.labelResult(fres);
+ results.add(fres);
+ }
}
- delegee.searchParams = original; // Back to original params
+ if (shouldOversample) {
+ delegee.searchParams = original; // Back to original params
+ }
- return fixedRes;
+ return results;
}
@Override
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/sampling/TakmiSampleFixer.java Sun Aug 11 12:19:13 2013
@@ -2,21 +2,19 @@ package org.apache.lucene.facet.sampling
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.Bits;
-
+import org.apache.lucene.facet.old.ScoredDocIDs;
+import org.apache.lucene.facet.old.ScoredDocIDsIterator;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.DrillDownQuery;
-import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -36,16 +34,21 @@ import org.apache.lucene.facet.taxonomy.
*/
/**
- * Fix sampling results by counting the intersection between two lists: a
- * TermDocs (list of documents in a certain category) and a DocIdSetIterator
- * (list of documents matching the query).
- *
+ * Fix sampling results by correct results, by counting the intersection between
+ * two lists: a TermDocs (list of documents in a certain category) and a
+ * DocIdSetIterator (list of documents matching the query).
+ * <p>
+ * This fixer is suitable for scenarios which prioritize accuracy over
+ * performance.
+ * <p>
+ * <b>Note:</b> for statistically more accurate top-k selection, set
+ * {@link SamplingParams#setOversampleFactor(double) oversampleFactor} to at
+ * least 2, so that the top-k categories would have better chance of showing up
+ * in the sampled top-cK results (see {@link SamplingParams#getOversampleFactor}
*
* @lucene.experimental
*/
-// TODO (Facet): implement also an estimated fixing by ratio (taking into
-// account "translation" of counts!)
-class TakmiSampleFixer implements SampleFixer {
+public class TakmiSampleFixer extends SampleFixer {
private TaxonomyReader taxonomyReader;
private IndexReader indexReader;
@@ -59,28 +62,10 @@ class TakmiSampleFixer implements Sample
}
@Override
- public void fixResult(ScoredDocIDs origDocIds, FacetResult fres)
- throws IOException {
- FacetResultNode topRes = fres.getFacetResultNode();
- fixResultNode(topRes, origDocIds);
- }
-
- /**
- * Fix result node count, and, recursively, fix all its children
- *
- * @param facetResNode
- * result node to be fixed
- * @param docIds
- * docids in effect
- * @throws IOException If there is a low-level I/O error.
- */
- private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds) throws IOException {
+ public void singleNodeFix(FacetResultNode facetResNode, ScoredDocIDs docIds, double samplingRatio) throws IOException {
recount(facetResNode, docIds);
- for (FacetResultNode frn : facetResNode.subResults) {
- fixResultNode(frn, docIds);
- }
}
-
+
/**
* Internal utility: recount for a facet result node
*
@@ -179,4 +164,5 @@ class TakmiSampleFixer implements Sample
}
return false; // exhausted
}
+
}
\ No newline at end of file
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java Sun Aug 11 12:19:13 2013
@@ -1,8 +1,7 @@
package org.apache.lucene.facet.search;
-import org.apache.lucene.facet.complements.ComplementCountingAggregator;
+import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -33,23 +32,8 @@ public class CountFacetRequest extends F
}
@Override
- public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
- // we rely on that, if needed, result is cleared by arrays!
- int[] a = arrays.getIntArray();
- if (useComplements) {
- return new ComplementCountingAggregator(a);
- }
- return new CountingAggregator(a);
- }
-
- @Override
- public double getValueOf(FacetArrays arrays, int ordinal) {
- return arrays.getIntArray()[ordinal];
- }
-
- @Override
- public FacetArraysSource getFacetArraysSource() {
- return FacetArraysSource.INT;
+ public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) {
+ return CountingFacetsAggregator.create(fip.getCategoryListParams(categoryPath));
}
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java Sun Aug 11 12:19:13 2013
@@ -2,6 +2,7 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
+import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.util.IntsRef;
@@ -34,6 +35,18 @@ import org.apache.lucene.util.IntsRef;
*/
public class CountingFacetsAggregator extends IntRollupFacetsAggregator {
+ /**
+ * Returns a {@link FacetsAggregator} suitable for counting categories given
+ * the {@link CategoryListParams}.
+ */
+ public static FacetsAggregator create(CategoryListParams clp) {
+ if (clp.createEncoder().createMatchingDecoder().getClass() == DGapVInt8IntDecoder.class) {
+ return new FastCountingFacetsAggregator();
+ } else {
+ return new CountingFacetsAggregator();
+ }
+ }
+
private final IntsRef ordinals = new IntsRef(32);
@Override
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DepthOneFacetResultsHandler.java Sun Aug 11 12:19:13 2013
@@ -9,6 +9,7 @@ import java.util.Comparator;
import org.apache.lucene.facet.search.FacetRequest.SortOrder;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.PriorityQueue;
/*
@@ -31,12 +32,11 @@ import org.apache.lucene.util.PriorityQu
/**
* A {@link FacetResultsHandler} which counts the top-K facets at depth 1 only
* and always labels all result categories. The results are always sorted by
- * value, in descending order. Sub-classes are responsible to pull the values
- * from the corresponding {@link FacetArrays}.
+ * value, in descending order.
*
* @lucene.experimental
*/
-public abstract class DepthOneFacetResultsHandler extends FacetResultsHandler {
+public class DepthOneFacetResultsHandler extends FacetResultsHandler {
private static class FacetResultNodeQueue extends PriorityQueue<FacetResultNode> {
@@ -51,40 +51,19 @@ public abstract class DepthOneFacetResul
@Override
protected boolean lessThan(FacetResultNode a, FacetResultNode b) {
- if (a.value < b.value) return true;
- if (a.value > b.value) return false;
- // both have the same value, break tie by ordinal
- return a.ordinal < b.ordinal;
+ return a.compareTo(b) < 0;
}
}
- public DepthOneFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays) {
- super(taxonomyReader, facetRequest, facetArrays);
+ public DepthOneFacetResultsHandler(TaxonomyReader taxonomyReader, FacetRequest facetRequest, FacetArrays facetArrays,
+ OrdinalValueResolver resolver) {
+ super(taxonomyReader, facetRequest, resolver, facetArrays);
assert facetRequest.getDepth() == 1 : "this handler only computes the top-K facets at depth 1";
assert facetRequest.numResults == facetRequest.getNumLabel() : "this handler always labels all top-K results";
assert facetRequest.getSortOrder() == SortOrder.DESCENDING : "this handler always sorts results in descending order";
}
- /** Returnt the value of the requested ordinal. Called once for the result root. */
- protected abstract double valueOf(int ordinal);
-
- /**
- * Add the siblings of {@code ordinal} to the given list. This is called
- * whenever the number of results is too high (> taxonomy size), instead of
- * adding them to a {@link PriorityQueue}.
- */
- protected abstract void addSiblings(int ordinal, int[] siblings, ArrayList<FacetResultNode> nodes) throws IOException;
-
- /**
- * Add the siblings of {@code ordinal} to the given {@link PriorityQueue}. The
- * given {@link PriorityQueue} is already filled with sentinel objects, so
- * implementations are encouraged to use {@link PriorityQueue#top()} and
- * {@link PriorityQueue#updateTop()} for best performance. Returns the total
- * number of siblings.
- */
- protected abstract int addSiblings(int ordinal, int[] siblings, PriorityQueue<FacetResultNode> pq);
-
@Override
public final FacetResult compute() throws IOException {
ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays();
@@ -93,23 +72,28 @@ public abstract class DepthOneFacetResul
int rootOrd = taxonomyReader.getOrdinal(facetRequest.categoryPath);
- FacetResultNode root = new FacetResultNode(rootOrd, valueOf(rootOrd));
+ FacetResultNode root = new FacetResultNode(rootOrd, resolver.valueOf(rootOrd));
root.label = facetRequest.categoryPath;
if (facetRequest.numResults > taxonomyReader.getSize()) {
// specialize this case, user is interested in all available results
ArrayList<FacetResultNode> nodes = new ArrayList<FacetResultNode>();
- int child = children[rootOrd];
- addSiblings(child, siblings, nodes);
- Collections.sort(nodes, new Comparator<FacetResultNode>() {
+ int ordinal = children[rootOrd];
+ while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
+ double value = resolver.valueOf(ordinal);
+ if (value > 0) {
+ FacetResultNode node = new FacetResultNode(ordinal, value);
+ node.label = taxonomyReader.getPath(ordinal);
+ nodes.add(node);
+ }
+ ordinal = siblings[ordinal];
+ }
+
+ CollectionUtil.introSort(nodes, Collections.reverseOrder(new Comparator<FacetResultNode>() {
@Override
public int compare(FacetResultNode o1, FacetResultNode o2) {
- int value = (int) (o2.value - o1.value);
- if (value == 0) {
- value = o2.ordinal - o1.ordinal;
- }
- return value;
+ return o1.compareTo(o2);
}
- });
+ }));
root.subResults = nodes;
return new FacetResult(facetRequest, root, nodes.size());
@@ -117,7 +101,21 @@ public abstract class DepthOneFacetResul
// since we use sentinel objects, we cannot reuse PQ. but that's ok because it's not big
PriorityQueue<FacetResultNode> pq = new FacetResultNodeQueue(facetRequest.numResults, true);
- int numSiblings = addSiblings(children[rootOrd], siblings, pq);
+ int ordinal = children[rootOrd];
+ FacetResultNode top = pq.top();
+ int numSiblings = 0;
+ while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
+ double value = resolver.valueOf(ordinal);
+ if (value > 0) {
+ ++numSiblings;
+ if (value > top.value) {
+ top.value = value;
+ top.ordinal = ordinal;
+ top = pq.updateTop();
+ }
+ }
+ ordinal = siblings[ordinal];
+ }
// pop() the least (sentinel) elements
int pqsize = pq.size();
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java Sun Aug 11 12:19:13 2013
@@ -25,7 +25,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields;
+import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -70,11 +73,26 @@ public class DrillSideways {
protected final IndexSearcher searcher;
protected final TaxonomyReader taxoReader;
-
- /** Create a new {@code DrillSideways} instance. */
+ protected final SortedSetDocValuesReaderState state;
+
+ /**
+ * Create a new {@code DrillSideways} instance, assuming the categories were
+ * indexed with {@link FacetFields}.
+ */
public DrillSideways(IndexSearcher searcher, TaxonomyReader taxoReader) {
this.searcher = searcher;
this.taxoReader = taxoReader;
+ this.state = null;
+ }
+
+ /**
+ * Create a new {@code DrillSideways} instance, assuming the categories were
+ * indexed with {@link SortedSetDocValuesFacetFields}.
+ */
+ public DrillSideways(IndexSearcher searcher, SortedSetDocValuesReaderState state) {
+ this.searcher = searcher;
+ this.taxoReader = null;
+ this.state = state;
}
/** Moves any drill-downs that don't have a corresponding
@@ -208,9 +226,9 @@ public class DrillSideways {
requests.add(fr);
}
}
- if (requests.isEmpty()) {
- throw new IllegalArgumentException("could not find FacetRequest for drill-sideways dimension \"" + dim + "\"");
- }
+ // We already moved all drill-downs that didn't have a
+ // FacetRequest, in moveDrillDownOnlyClauses above:
+ assert !requests.isEmpty();
drillSidewaysCollectors[idx++] = FacetsCollector.create(getDrillSidewaysAccumulator(dim, new FacetSearchParams(fsp.indexingParams, requests)));
}
@@ -402,16 +420,20 @@ public class DrillSideways {
query = new DrillDownQuery(filter, query);
}
if (sort != null) {
+ int limit = searcher.getIndexReader().maxDoc();
+ if (limit == 0) {
+ limit = 1; // the collector does not alow numHits = 0
+ }
+ topN = Math.min(topN, limit);
final TopFieldCollector hitCollector = TopFieldCollector.create(sort,
- Math.min(topN, searcher.getIndexReader().maxDoc()),
+ topN,
after,
true,
doDocScores,
doMaxScore,
true);
DrillSidewaysResult r = search(query, hitCollector, fsp);
- r.hits = hitCollector.topDocs();
- return r;
+ return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
} else {
return search(after, query, topN, fsp);
}
@@ -423,22 +445,34 @@ public class DrillSideways {
*/
public DrillSidewaysResult search(ScoreDoc after,
DrillDownQuery query, int topN, FacetSearchParams fsp) throws IOException {
- TopScoreDocCollector hitCollector = TopScoreDocCollector.create(Math.min(topN, searcher.getIndexReader().maxDoc()), after, true);
+ int limit = searcher.getIndexReader().maxDoc();
+ if (limit == 0) {
+ limit = 1; // the collector does not alow numHits = 0
+ }
+ topN = Math.min(topN, limit);
+ TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after, true);
DrillSidewaysResult r = search(query, hitCollector, fsp);
- r.hits = hitCollector.topDocs();
- return r;
+ return new DrillSidewaysResult(r.facetResults, hitCollector.topDocs());
}
/** Override this to use a custom drill-down {@link
* FacetsAccumulator}. */
protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) throws IOException {
- return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
+ if (taxoReader != null) {
+ return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader, null);
+ } else {
+ return FacetsAccumulator.create(fsp, state, null);
+ }
}
/** Override this to use a custom drill-sideways {@link
* FacetsAccumulator}. */
protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException {
- return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader);
+ if (taxoReader != null) {
+ return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader, null);
+ } else {
+ return FacetsAccumulator.create(fsp, state, null);
+ }
}
/** Override this and return true if your collector
@@ -454,16 +488,20 @@ public class DrillSideways {
return false;
}
- /** Represents the returned result from a drill sideways
- * search. */
+ /**
+ * Represents the returned result from a drill sideways search. Note that if
+ * you called
+ * {@link DrillSideways#search(DrillDownQuery, Collector, FacetSearchParams)},
+ * then {@link #hits} will be {@code null}.
+ */
public static class DrillSidewaysResult {
/** Combined drill down & sideways results. */
public final List<FacetResult> facetResults;
/** Hits. */
- public TopDocs hits;
+ public final TopDocs hits;
- DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
+ public DrillSidewaysResult(List<FacetResult> facetResults, TopDocs hits) {
this.facetResults = facetResults;
this.hits = hits;
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysQuery.java Sun Aug 11 12:19:13 2013
@@ -119,17 +119,19 @@ class DrillSidewaysQuery extends Query {
Terms terms = reader.terms(field);
if (terms != null) {
termsEnum = terms.iterator(null);
+ } else {
+ termsEnum = null;
}
lastField = field;
}
+ dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
if (termsEnum == null) {
nullCount++;
continue;
}
- dims[dim].docsEnums = new DocsEnum[drillDownTerms[dim].length];
for(int i=0;i<drillDownTerms[dim].length;i++) {
- if (termsEnum.seekExact(drillDownTerms[dim][i].bytes(), false)) {
- DocsEnum docsEnum = termsEnum.docs(null, null);
+ if (termsEnum.seekExact(drillDownTerms[dim][i].bytes())) {
+ DocsEnum docsEnum = termsEnum.docs(null, null, 0);
if (docsEnum != null) {
dims[dim].docsEnums[i] = docsEnum;
dims[dim].maxCost = Math.max(dims[dim].maxCost, docsEnum.cost());
@@ -138,7 +140,7 @@ class DrillSidewaysQuery extends Query {
}
}
- if (nullCount > 1) {
+ if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
return null;
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSidewaysScorer.java Sun Aug 11 12:19:13 2013
@@ -80,7 +80,7 @@ class DrillSidewaysScorer extends Scorer
// Position all scorers to their first matching doc:
baseScorer.nextDoc();
for(DocsEnumsAndFreq dim : dims) {
- for(DocsEnum docsEnum : dim.docsEnums) {
+ for (DocsEnum docsEnum : dim.docsEnums) {
if (docsEnum != null) {
docsEnum.nextDoc();
}
@@ -95,7 +95,7 @@ class DrillSidewaysScorer extends Scorer
for(int dim=0;dim<numDims;dim++) {
docsEnums[dim] = dims[dim].docsEnums;
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
- for(DocsEnum de : dims[dim].docsEnums) {
+ for (DocsEnum de : dims[dim].docsEnums) {
if (de != null) {
drillDownCost += de.cost();
}
Modified: lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java?rev=1512909&r1=1512908&r2=1512909&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java (original)
+++ lucene/dev/branches/lucene4956/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java Sun Aug 11 12:19:13 2013
@@ -1,9 +1,9 @@
package org.apache.lucene.facet.search;
-import java.io.IOException;
-
+import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+import org.apache.lucene.facet.range.RangeFacetRequest;
import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,90 +23,70 @@ import org.apache.lucene.facet.taxonomy.
*/
/**
- * Request to accumulate facet information for a specified facet and possibly
- * also some of its descendants, upto a specified depth.
+ * Defines an aggregation request for a category. Allows specifying the
+ * {@link #numResults number of child categories} to return as well as
+ * {@link #getSortOrder() which} categories to consider the "top" (highest or
+ * lowest ranking ones).
* <p>
- * The facet request additionally defines what information should
- * be computed within the facet results, if and how should results
- * be ordered, etc.
- * <P>
- * An example facet request is to look at all sub-categories of "Author", and
- * return the 10 with the highest counts (sorted by decreasing count).
+ * If the category being aggregated is hierarchical, you can also specify the
+ * {@link #setDepth(int) depth} up which to aggregate child categories as well
+ * as how the result should be {@link #setResultMode(ResultMode) constructed}.
*
* @lucene.experimental
*/
public abstract class FacetRequest {
/**
- * Result structure manner of applying request's limits such as
- * {@link FacetRequest#getNumLabel()} and {@link FacetRequest#numResults}.
- * Only relevant when {@link FacetRequest#getDepth()} is > 1.
+ * When {@link FacetRequest#getDepth()} is greater than 1, defines the
+ * structure of the result as well as how constraints such as
+ * {@link FacetRequest#numResults} and {@link FacetRequest#getNumLabel()} are
+ * applied.
*/
public enum ResultMode {
- /** Limits are applied per node, and the result has a full tree structure. */
+ /**
+ * Constraints are applied per node, and the result has a full tree
+ * structure. Default result mode.
+ */
PER_NODE_IN_TREE,
- /** Limits are applied globally, on total number of results, and the result has a flat structure. */
+ /**
+ * Constraints are applied globally, on total number of results, and the
+ * result has a flat structure.
+ */
GLOBAL_FLAT
}
/**
- * Specifies which array of {@link FacetArrays} should be used to resolve
- * values. When set to {@link #INT} or {@link #FLOAT}, allows creating an
- * optimized {@link FacetResultsHandler}, which does not call
- * {@link FacetRequest#getValueOf(FacetArrays, int)} for every ordinals.
- * <p>
- * If set to {@link #BOTH}, the {@link FacetResultsHandler} will use
- * {@link FacetRequest#getValueOf(FacetArrays, int)} to resolve ordinal
- * values, although it is recommended that you consider writing a specialized
- * {@link FacetResultsHandler}.
+ * Defines which categories to return. If {@link #DESCENDING} (the default),
+ * the highest {@link FacetRequest#numResults} weighted categories will be
+ * returned, otherwise the lowest ones.
*/
- public enum FacetArraysSource { INT, FLOAT, BOTH }
-
- /** Requested sort order for the results. */
public enum SortOrder { ASCENDING, DESCENDING }
-
- /**
- * Default depth for facets accumulation.
- * @see #getDepth()
- */
- public static final int DEFAULT_DEPTH = 1;
-
- /**
- * Default result mode
- * @see #getResultMode()
- */
- public static final ResultMode DEFAULT_RESULT_MODE = ResultMode.PER_NODE_IN_TREE;
-
+
+ /** The category being aggregated in this facet request. */
public final CategoryPath categoryPath;
+
+ /** The number of child categories to return for {@link #categoryPath}. */
public final int numResults;
private int numLabel;
- private int depth;
- private SortOrder sortOrder;
+ private int depth = 1;
+ private SortOrder sortOrder = SortOrder.DESCENDING;
+ private ResultMode resultMode = ResultMode.PER_NODE_IN_TREE;
- /**
- * Computed at construction, this hashCode is based on two final members
- * {@link CategoryPath} and <code>numResults</code>
- */
+ // Computed at construction; based on categoryPath and numResults.
private final int hashCode;
- private ResultMode resultMode = DEFAULT_RESULT_MODE;
-
/**
- * Initialize the request with a given path, and a requested number of facets
- * results. By default, all returned results would be labeled - to alter this
- * default see {@link #setNumLabel(int)}.
- * <p>
- * <b>NOTE:</b> if <code>numResults</code> is given as
- * <code>Integer.MAX_VALUE</code> than all the facet results would be
- * returned, without any limit.
- * <p>
- * <b>NOTE:</b> it is assumed that the given {@link CategoryPath} is not
- * modified after construction of this object. Otherwise, some things may not
- * function properly, e.g. {@link #hashCode()}.
+ * Constructor with the given category to aggregate and the number of child
+ * categories to return.
*
- * @throws IllegalArgumentException if numResults is ≤ 0
+ * @param path
+ * the category to aggregate. Cannot be {@code null}.
+ * @param numResults
+ * the number of child categories to return. If set to
+ * {@code Integer.MAX_VALUE}, all immediate child categories will be
+ * returned. Must be greater than 0.
*/
public FacetRequest(CategoryPath path, int numResults) {
if (numResults <= 0) {
@@ -118,152 +98,116 @@ public abstract class FacetRequest {
categoryPath = path;
this.numResults = numResults;
numLabel = numResults;
- depth = DEFAULT_DEPTH;
- sortOrder = SortOrder.DESCENDING;
-
hashCode = categoryPath.hashCode() ^ this.numResults;
}
/**
- * Create an aggregator for this facet request. Aggregator action depends on
- * request definition. For a count request, it will usually increment the
- * count for that facet.
- *
- * @param useComplements
- * whether the complements optimization is being used for current
- * computation.
- * @param arrays
- * provider for facet arrays in use for current computation.
- * @param taxonomy
- * reader of taxonomy in effect.
- * @throws IOException If there is a low-level I/O error.
+ * Returns the {@link FacetsAggregator} which can aggregate the categories of
+ * this facet request. The aggregator is expected to aggregate category values
+ * into {@link FacetArrays}. If the facet request does not support that, e.g.
+ * {@link RangeFacetRequest}, it can return {@code null}. Note though that
+ * such requests require a dedicated {@link FacetsAccumulator}.
*/
- public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy)
- throws IOException {
- throw new UnsupportedOperationException("this FacetRequest does not support this type of Aggregator anymore; " +
- "you should override FacetsAccumulator to return the proper FacetsAggregator");
- }
+ public abstract FacetsAggregator createFacetsAggregator(FacetIndexingParams fip);
@Override
public boolean equals(Object o) {
if (o instanceof FacetRequest) {
- FacetRequest that = (FacetRequest)o;
- return that.hashCode == this.hashCode &&
+ FacetRequest that = (FacetRequest) o;
+ return that.hashCode == this.hashCode &&
that.categoryPath.equals(this.categoryPath) &&
that.numResults == this.numResults &&
that.depth == this.depth &&
that.resultMode == this.resultMode &&
- that.numLabel == this.numLabel;
+ that.numLabel == this.numLabel &&
+ that.sortOrder == this.sortOrder;
}
return false;
}
/**
- * How deeply to look under the given category. If the depth is 0,
- * only the category itself is counted. If the depth is 1, its immediate
- * children are also counted, and so on. If the depth is Integer.MAX_VALUE,
- * all the category's descendants are counted.<br>
+ * How deeply to look under {@link #categoryPath}. By default, only its
+ * immediate children are aggregated (depth=1). If set to
+ * {@code Integer.MAX_VALUE}, the entire sub-tree of the category will be
+ * aggregated.
+ * <p>
+ * <b>NOTE:</b> setting depth to 0 means that only the category itself should
+ * be aggregated. In that case, make sure to index the category with
+ * {@link OrdinalPolicy#ALL_PARENTS}, unless it is not the root category (the
+ * dimension), in which case {@link OrdinalPolicy#ALL_BUT_DIMENSION} is fine
+ * too.
*/
public final int getDepth() {
- // TODO add AUTO_EXPAND option
+ // TODO an AUTO_EXPAND option could be useful
return depth;
}
/**
- * Returns the {@link FacetArraysSource} this {@link FacetRequest} uses in
- * {@link #getValueOf(FacetArrays, int)}.
- */
- public abstract FacetArraysSource getFacetArraysSource();
-
- /**
- * If getNumLabel() < getNumResults(), only the first getNumLabel() results
- * will have their category paths calculated, and the rest will only be
- * available as ordinals (category numbers) and will have null paths.
- * <P>
- * If Integer.MAX_VALUE is specified, all results are labled.
- * <P>
- * The purpose of this parameter is to avoid having to run the whole faceted
- * search again when the user asks for more values for the facet; The
- * application can ask (getNumResults()) for more values than it needs to
- * show, but keep getNumLabel() only the number it wants to immediately show.
- * The slow-down caused by finding more values is negligible, because the
- * slowest part - finding the categories' paths, is avoided.
+ * Allows to specify the number of categories to label. By default all
+ * returned categories are labeled.
* <p>
- * Depending on the {@link #getResultMode() LimitsMode}, this limit is applied
- * globally or per results node. In the global mode, if this limit is 3, only
- * 3 top results would be labeled. In the per-node mode, if this limit is 3, 3
- * top children of {@link #categoryPath the target category} would be labeled,
- * as well as 3 top children of each of them, and so forth, until the depth
- * defined by {@link #getDepth()}.
- *
- * @see #getResultMode()
+ * This allows an app to request a large number of results to return, while
+ * labeling them on-demand (e.g. when the UI requests to show more
+ * categories).
*/
public final int getNumLabel() {
return numLabel;
}
- /** Return the requested result mode. */
+ /** Return the requested result mode (defaults to {@link ResultMode#PER_NODE_IN_TREE}. */
public final ResultMode getResultMode() {
return resultMode;
}
- /** Return the requested order of results. */
+ /** Return the requested order of results (defaults to {@link SortOrder#DESCENDING}. */
public final SortOrder getSortOrder() {
return sortOrder;
}
- /**
- * Return the value of a category used for facets computations for this
- * request. For a count request this would be the count for that facet, i.e.
- * an integer number. but for other requests this can be the result of a more
- * complex operation, and the result can be any double precision number.
- * Having this method with a general name <b>value</b> which is double
- * precision allows to have more compact API and code for handling counts and
- * perhaps other requests (such as for associations) very similarly, and by
- * the same code and API, avoiding code duplication.
- *
- * @param arrays
- * provider for facet arrays in use for current computation.
- * @param idx
- * an index into the count arrays now in effect in
- * <code>arrays</code>. E.g., for ordinal number <i>n</i>, with
- * partition, of size <i>partitionSize</i>, now covering <i>n</i>,
- * <code>getValueOf</code> would be invoked with <code>idx</code>
- * being <i>n</i> % <i>partitionSize</i>.
- */
- // TODO perhaps instead of getValueOf we can have a postProcess(FacetArrays)
- // That, together with getFacetArraysSource should allow ResultHandlers to
- // efficiently obtain the values from the arrays directly
- public abstract double getValueOf(FacetArrays arrays, int idx);
-
@Override
public int hashCode() {
return hashCode;
}
+ /**
+ * Sets the depth up to which to aggregate facets.
+ *
+ * @see #getDepth()
+ */
public void setDepth(int depth) {
this.depth = depth;
}
+ /**
+ * Sets the number of categories to label.
+ *
+ * @see #getNumLabel()
+ */
public void setNumLabel(int numLabel) {
this.numLabel = numLabel;
}
/**
- * @param resultMode the resultMode to set
+ * Sets the {@link ResultMode} for this request.
+ *
* @see #getResultMode()
*/
public void setResultMode(ResultMode resultMode) {
this.resultMode = resultMode;
}
-
+
+ /**
+ * Sets the {@link SortOrder} for this request.
+ *
+ * @see #getSortOrder()
+ */
public void setSortOrder(SortOrder sortOrder) {
this.sortOrder = sortOrder;
}
@Override
public String toString() {
- return categoryPath.toString()+" nRes="+numResults+" nLbl="+numLabel;
+ return categoryPath.toString() + " nRes=" + numResults + " nLbl=" + numLabel;
}
}