You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2015/01/15 19:33:01 UTC
svn commit: r1652212 - in /lucene/dev/trunk/lucene/spatial/src:
java/org/apache/lucene/spatial/ java/org/apache/lucene/spatial/prefix/
java/org/apache/lucene/spatial/prefix/tree/
test/org/apache/lucene/spatial/prefix/
Author: dsmiley
Date: Thu Jan 15 18:33:00 2015
New Revision: 1652212
URL: http://svn.apache.org/r1652212
Log:
LUCENE-5735: spatial PrefixTreeFacetCounter (for faceting on SpatialPrefixTrees)
Added:
lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PrefixTreeFacetCounter.java (with props)
Removed:
lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeFacets.java
Modified:
lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java
lucene/dev/trunk/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
Modified: lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java?rev=1652212&r1=1652211&r2=1652212&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java (original)
+++ lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/NumberRangePrefixTreeStrategy.java Thu Jan 15 18:33:00 2015
@@ -29,10 +29,11 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.spatial.prefix.NumberRangePrefixTreeFacets;
+import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
+import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
-import org.apache.lucene.util.Bits;
+import org.apache.lucene.spatial.prefix.tree.PrefixTreeFacetCounter;
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
@@ -76,13 +77,13 @@ public class NumberRangePrefixTreeStrate
/** Calculates facets between {@code start} and {@code end} to a detail level one greater than that provided by the
* arguments. For example providing March to October of 2014 would return facets to the day level of those months.
* This is just a convenience method.
- * @see #calcFacets(IndexReaderContext, Bits, Shape, int)
+ * @see #calcFacets(IndexReaderContext, Filter, Shape, int)
*/
- public Facets calcFacets(IndexReaderContext context, final Bits acceptDocs, UnitNRShape start, UnitNRShape end)
+ public Facets calcFacets(IndexReaderContext context, Filter filter, UnitNRShape start, UnitNRShape end)
throws IOException {
- Shape filter = getGrid().toRangeShape(start, end);
+ Shape facetRange = getGrid().toRangeShape(start, end);
int detailLevel = Math.max(start.getLevel(), end.getLevel()) + 1;
- return calcFacets(context, acceptDocs, filter, detailLevel);
+ return calcFacets(context, filter, facetRange, detailLevel);
}
/**
@@ -92,9 +93,50 @@ public class NumberRangePrefixTreeStrate
* {@link org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape#getLevel()}.
* Facet computation is implemented by navigating the underlying indexed terms efficiently.
*/
- public Facets calcFacets(IndexReaderContext context, final Bits acceptDocs, Shape facetRange, int level)
+ public Facets calcFacets(IndexReaderContext context, Filter filter, Shape facetRange, final int level)
throws IOException {
- return NumberRangePrefixTreeFacets.compute(this, context, acceptDocs, facetRange, level);
+ final Facets facets = new Facets(level);
+ PrefixTreeFacetCounter.compute(this, context, filter, facetRange, level,
+ new PrefixTreeFacetCounter.FacetVisitor() {
+ Facets.FacetParentVal parentFacet;
+ UnitNRShape parentShape;
+
+ @Override
+ public void visit(Cell cell, int count) {
+ if (cell.getLevel() < level - 1) {//some ancestor of parent facet level, direct or distant
+ parentFacet = null;//reset
+ parentShape = null;//reset
+ facets.topLeaves += count;
+ } else if (cell.getLevel() == level - 1) {//parent
+ //set up FacetParentVal
+ setupParent((UnitNRShape) cell.getShape());
+ parentFacet.parentLeaves += count;
+ } else {//at facet level
+ UnitNRShape unitShape = (UnitNRShape) cell.getShape();
+ UnitNRShape unitShapeParent = unitShape.getShapeAtLevel(unitShape.getLevel() - 1);
+ if (parentFacet == null || !parentShape.equals(unitShapeParent)) {
+ setupParent(unitShapeParent);
+ }
+ //lazy init childCounts
+ if (parentFacet.childCounts == null) {
+ parentFacet.childCounts = new int[parentFacet.childCountsLen];
+ }
+ parentFacet.childCounts[unitShape.getValAtLevel(cell.getLevel())] += count;
+ }
+ }
+
+ private void setupParent(UnitNRShape unitShape) {
+ parentShape = unitShape.clone();
+ //Look for existing parentFacet (from previous segment), or create anew if needed
+ parentFacet = facets.parents.get(parentShape);
+ if (parentFacet == null) {//didn't find one; make a new one
+ parentFacet = new Facets.FacetParentVal();
+ parentFacet.childCountsLen = getGrid().getNumSubCells(parentShape);
+ facets.parents.put(parentShape, parentFacet);
+ }
+ }
+ });
+ return facets;
}
/** Facet response information */
Added: lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PrefixTreeFacetCounter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PrefixTreeFacetCounter.java?rev=1652212&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PrefixTreeFacetCounter.java (added)
+++ lucene/dev/trunk/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/PrefixTreeFacetCounter.java Thu Jan 15 18:33:00 2015
@@ -0,0 +1,191 @@
+package org.apache.lucene.spatial.prefix.tree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import com.spatial4j.core.shape.Shape;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.spatial.prefix.AbstractVisitingPrefixTreeFilter;
+import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.SparseFixedBitSet;
+
+/**
+ * Computes facets on cells for {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}.
+ *
+ * @lucene.experimental
+ */
+public class PrefixTreeFacetCounter {
+
+ /** A callback/visitor of facet counts. */
+ public static abstract class FacetVisitor {
+ /** Called at the start of the segment, if there is indexed data. */
+ public void startOfSegment() {}
+
+ /** Called for cells with a leaf, or cells at the target facet level. {@code count} is greater than zero.
+ * When an ancestor cell is given with non-zero count, the count can be considered to be added to all cells
+ * below. You won't necessarily get a cell at level {@code facetLevel} if the indexed data is courser (bigger).
+ */
+ public abstract void visit(Cell cell, int count);
+ }
+
+ private PrefixTreeFacetCounter() {
+ }
+
+ /**
+ * Computes facets using a callback/visitor style design, allowing flexibility for the caller to determine what to do
+ * with each underlying count.
+ *
+ * @param strategy the prefix tree strategy (contains the field reference, grid, max levels)
+ * @param context the IndexReader's context
+ * @param filter a Filter to limit counted docs. For optimal performance, it's
+ * {@link org.apache.lucene.search.DocIdSet#bits()} should be non-null. If no filter is provided, live
+ * docs are counted.
+ * @param queryShape the shape to limit the range of facet counts to
+ * @param facetLevel the maximum depth (detail) of faceted cells
+ * @param facetVisitor the visitor/callback to receive the counts
+ */
+ public static void compute(PrefixTreeStrategy strategy, IndexReaderContext context, Filter filter,
+ Shape queryShape, int facetLevel, FacetVisitor facetVisitor)
+ throws IOException {
+ //We collect per-leaf
+ for (final LeafReaderContext leafCtx : context.leaves()) {
+ //determine leaf acceptDocs Bits
+ Bits leafAcceptDocs;
+ if (filter == null) {
+ leafAcceptDocs = leafCtx.reader().getLiveDocs();//filter deleted
+ } else {
+ final DocIdSet docIdSet = filter.getDocIdSet(leafCtx, leafCtx.reader().getLiveDocs());
+ if (docIdSet == null) {
+ continue;//no docs in filter
+ }
+ leafAcceptDocs = docIdSet.bits();
+ if (leafAcceptDocs == null) {
+ final DocIdSetIterator iterator = docIdSet.iterator();
+ if (iterator == null) {
+ continue;//no docs in filter
+ }
+ //build bits from iterator (abnormal, hopefully, not expecting many docs)
+ SparseFixedBitSet bitSet = new SparseFixedBitSet(leafCtx.reader().maxDoc());
+ bitSet.or(iterator);
+ leafAcceptDocs = bitSet;
+ }
+ }
+
+ compute(strategy, leafCtx, leafAcceptDocs, queryShape, facetLevel, facetVisitor);
+ }
+ }
+
+ /** Lower-level per-leaf segment method. */
+ public static void compute(final PrefixTreeStrategy strategy, final LeafReaderContext context, final Bits acceptDocs,
+ final Shape queryShape, final int facetLevel, final FacetVisitor facetVisitor)
+ throws IOException {
+ if (acceptDocs != null && acceptDocs.length() != context.reader().maxDoc()) {
+ throw new IllegalArgumentException(
+ "acceptDocs bits length " + acceptDocs.length() +" != leaf maxdoc " + context.reader().maxDoc());
+ }
+ final SpatialPrefixTree tree = strategy.getGrid();
+
+ //scanLevel is an optimization knob of AbstractVisitingPrefixTreeFilter. It's unlikely
+ // another scanLevel would be much faster and it tends to be a risky knob (can help a little, can hurt a ton).
+ // TODO use RPT's configured scan level? Do we know better here? Hard to say.
+ final int scanLevel = tree.getMaxLevels();
+
+ //AbstractVisitingPrefixTreeFilter is a Lucene Filter. We don't need a filter; we use it for its great prefix-tree
+ // traversal code. TODO consider refactoring if/when it makes sense (more use cases than this)
+ new AbstractVisitingPrefixTreeFilter(queryShape, strategy.getFieldName(), tree, facetLevel, scanLevel) {
+
+ @Override
+ public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ assert facetLevel == super.detailLevel;//same thing, FYI. (constant)
+
+ final boolean hasIndexedLeaves = !strategy.isPointsOnly();
+
+ return new VisitorTemplate(context, acceptDocs, hasIndexedLeaves) {
+
+ @Override
+ protected void start() throws IOException {
+ facetVisitor.startOfSegment();
+ }
+
+ @Override
+ protected DocIdSet finish() throws IOException {
+ return null;//unused;
+ }
+
+ @Override
+ protected boolean visit(Cell cell) throws IOException {
+ // At facetLevel...
+ if (cell.getLevel() == facetLevel) {
+ // Count docs
+ visitLeaf(cell);//we're not a leaf but we treat it as such at facet level
+ return false;//don't descend further; this is enough detail
+ }
+
+ // We optimize for discriminating filters (reflected in acceptDocs) and short-circuit if no
+ // matching docs. We could do this at all levels or never but the closer we get to the facet level, the
+ // higher the probability this is worthwhile. We do when docFreq == 1 because it's a cheap check, especially
+ // due to "pulsing" in the codec.
+ //TODO this opt should move to VisitorTemplate (which contains an optimization TODO to this effect)
+ if (cell.getLevel() == facetLevel - 1 || termsEnum.docFreq() == 1) {
+ if (!hasDocsAtThisTerm()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ protected void visitLeaf(Cell cell) throws IOException {
+ final int count = countDocsAtThisTerm();
+ if (count > 0) {
+ facetVisitor.visit(cell, count);
+ }
+ }
+
+ private int countDocsAtThisTerm() throws IOException {
+ if (acceptDocs == null) {
+ return termsEnum.docFreq();
+ }
+ int count = 0;
+ docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
+ while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ count++;
+ }
+ return count;
+ }
+
+ private boolean hasDocsAtThisTerm() throws IOException {
+ if (acceptDocs == null) {
+ return true;
+ }
+ docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
+ return (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
+ }
+
+ }.getDocIdSet();
+ }
+ }.getDocIdSet(context, acceptDocs);
+ }
+}
Modified: lucene/dev/trunk/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java?rev=1652212&r1=1652211&r2=1652212&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java (original)
+++ lucene/dev/trunk/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/NumberRangeFacetsTest.java Thu Jan 15 18:33:00 2015
@@ -26,11 +26,8 @@ import java.util.List;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.spatial4j.core.shape.Shape;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.queries.TermsFilter;
+import org.apache.lucene.search.Filter;
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy;
import org.apache.lucene.spatial.NumberRangePrefixTreeStrategy.Facets;
import org.apache.lucene.spatial.StrategyTestCase;
@@ -39,7 +36,6 @@ import org.apache.lucene.spatial.prefix.
import org.apache.lucene.spatial.prefix.tree.DateRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
-import org.apache.lucene.util.FixedBitSet;
import org.junit.Before;
import org.junit.Test;
@@ -64,7 +60,7 @@ public class NumberRangeFacetsTest exten
randomCalWindowMs = Math.max(2000L, tmpCal.getTimeInMillis());
}
- @Repeat(iterations = 100)
+ @Repeat(iterations = 10000)
@Test
public void test() throws IOException {
//generate test data
@@ -114,47 +110,40 @@ public class NumberRangeFacetsTest exten
detailLevel = -1 * detailLevel;
}
- //Randomly pick a filter as Bits/acceptDocs
- FixedBitSet acceptDocs = null;//the answer
+ //Randomly pick a filter
+ Filter filter = null;
List<Integer> acceptFieldIds = new ArrayList<>();
if (usually()) {
//get all possible IDs into a list, random shuffle it, then randomly choose how many of the first we use to
// replace the list.
for (int i = 0; i < indexedShapes.size(); i++) {
- if (indexedShapes.get(i) == null) {
+ if (indexedShapes.get(i) == null) { // we deleted this one
continue;
}
acceptFieldIds.add(i);
}
Collections.shuffle(acceptFieldIds, random());
acceptFieldIds = acceptFieldIds.subList(0, randomInt(acceptFieldIds.size()));
- acceptDocs = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
- //query for their Lucene docIds to put into acceptDocs
if (!acceptFieldIds.isEmpty()) {
- BooleanQuery acceptQuery = new BooleanQuery();
+ List<Term> terms = new ArrayList<>();
for (Integer acceptDocId : acceptFieldIds) {
- acceptQuery.add(new TermQuery(new Term("id", acceptDocId.toString())), BooleanClause.Occur.SHOULD);
+ terms.add(new Term("id", acceptDocId.toString()));
}
- final TopDocs topDocs = indexSearcher.search(acceptQuery, numIndexedShapes);
-
- for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
- acceptDocs.set(scoreDoc.doc);
- }
-
+ filter = new TermsFilter(terms);
}
}
//Lets do it!
NumberRangePrefixTree.NRShape facetRange = tree.toRangeShape(tree.toShape(leftCal), tree.toShape(rightCal));
Facets facets = ((NumberRangePrefixTreeStrategy) strategy)
- .calcFacets(indexSearcher.getTopReaderContext(), acceptDocs, facetRange, detailLevel);
+ .calcFacets(indexSearcher.getTopReaderContext(), filter, facetRange, detailLevel);
//System.out.println("Q: " + queryIdx + " " + facets);
//Verify results. We do it by looping over indexed shapes and reducing the facet counts.
Shape facetShapeRounded = facetRange.roundToLevel(detailLevel);
for (int indexedShapeId = 0; indexedShapeId < indexedShapes.size(); indexedShapeId++) {
- if (acceptDocs != null && !acceptFieldIds.contains(indexedShapeId)) {
+ if (filter != null && !acceptFieldIds.contains(indexedShapeId)) {
continue;// this doc was filtered out via acceptDocs
}
Shape indexedShape = indexedShapes.get(indexedShapeId);