You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/07 13:21:14 UTC
svn commit: r1479862 [24/38] - in /lucene/dev/branches/lucene4258: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/solr/ dev-tools/maven/solr/core/src/java/
dev-tools/maven/solr/solrj/src/java/ dev-t...
Modified: lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java (original)
+++ lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTree.java Tue May 7 11:20:55 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.spatial.prefix
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Point;
+import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import java.nio.charset.Charset;
@@ -77,42 +78,64 @@ public abstract class SpatialPrefixTree
*/
public abstract int getLevelForDistance(double dist);
- //TODO double getDistanceForLevel(int level)
+ /**
+ * Given a cell having the specified level, returns the distance from opposite
+ * corners. Since this might very depending on where the cell is, this method
+ * may over-estimate.
+ *
+ * @param level [1 to maxLevels]
+ * @return > 0
+ */
+ public double getDistanceForLevel(int level) {
+ if (level < 1 || level > getMaxLevels())
+ throw new IllegalArgumentException("Level must be in 1 to maxLevels range");
+ //TODO cache for each level
+ Cell cell = getCell(ctx.getWorldBounds().getCenter(), level);
+ Rectangle bbox = cell.getShape().getBoundingBox();
+ double width = bbox.getWidth();
+ double height = bbox.getHeight();
+ //Use standard cartesian hypotenuse. For geospatial, this answer is larger
+ // than the correct one but it's okay to over-estimate.
+ return Math.sqrt(width * width + height * height);
+ }
- private transient Node worldNode;//cached
+ private transient Cell worldCell;//cached
/**
- * Returns the level 0 cell which encompasses all spatial data. Equivalent to {@link #getNode(String)} with "".
+ * Returns the level 0 cell which encompasses all spatial data. Equivalent to {@link #getCell(String)} with "".
* This cell is threadsafe, just like a spatial prefix grid is, although cells aren't
* generally threadsafe.
* TODO rename to getTopCell or is this fine?
*/
- public Node getWorldNode() {
- if (worldNode == null) {
- worldNode = getNode("");
+ public Cell getWorldCell() {
+ if (worldCell == null) {
+ worldCell = getCell("");
}
- return worldNode;
+ return worldCell;
}
/**
- * The cell for the specified token. The empty string should be equal to {@link #getWorldNode()}.
+ * The cell for the specified token. The empty string should be equal to {@link #getWorldCell()}.
* Precondition: Never called when token length > maxLevel.
*/
- public abstract Node getNode(String token);
+ public abstract Cell getCell(String token);
- public abstract Node getNode(byte[] bytes, int offset, int len);
+ public abstract Cell getCell(byte[] bytes, int offset, int len);
- public final Node getNode(byte[] bytes, int offset, int len, Node target) {
+ public final Cell getCell(byte[] bytes, int offset, int len, Cell target) {
if (target == null) {
- return getNode(bytes, offset, len);
+ return getCell(bytes, offset, len);
}
target.reset(bytes, offset, len);
return target;
}
- protected Node getNode(Point p, int level) {
- return getNodes(p, level, false).get(0);
+ /**
+ * Returns the cell containing point {@code p} at the specified {@code level}.
+ */
+ protected Cell getCell(Point p, int level) {
+ return getCells(p, level, false).get(0);
}
/**
@@ -121,7 +144,7 @@ public abstract class SpatialPrefixTree
* leaf and none of its children are added.
* <p/>
* This implementation checks if shape is a Point and if so returns {@link
- * #getNodes(com.spatial4j.core.shape.Point, int, boolean)}.
+ * #getCells(com.spatial4j.core.shape.Point, int, boolean)}.
*
* @param shape the shape; non-null
* @param detailLevel the maximum detail level to get cells for
@@ -132,45 +155,45 @@ public abstract class SpatialPrefixTree
* ~20-25% fewer cells.
* @return a set of cells (no dups), sorted, immutable, non-null
*/
- public List<Node> getNodes(Shape shape, int detailLevel, boolean inclParents,
+ public List<Cell> getCells(Shape shape, int detailLevel, boolean inclParents,
boolean simplify) {
//TODO consider an on-demand iterator -- it won't build up all cells in memory.
if (detailLevel > maxLevels) {
throw new IllegalArgumentException("detailLevel > maxLevels");
}
if (shape instanceof Point) {
- return getNodes((Point) shape, detailLevel, inclParents);
+ return getCells((Point) shape, detailLevel, inclParents);
}
- List<Node> cells = new ArrayList<Node>(inclParents ? 4096 : 2048);
- recursiveGetNodes(getWorldNode(), shape, detailLevel, inclParents, simplify, cells);
+ List<Cell> cells = new ArrayList<Cell>(inclParents ? 4096 : 2048);
+ recursiveGetCells(getWorldCell(), shape, detailLevel, inclParents, simplify, cells);
return cells;
}
/**
- * Returns true if node was added as a leaf. If it wasn't it recursively
+ * Returns true if cell was added as a leaf. If it wasn't it recursively
* descends.
*/
- private boolean recursiveGetNodes(Node node, Shape shape, int detailLevel,
+ private boolean recursiveGetCells(Cell cell, Shape shape, int detailLevel,
boolean inclParents, boolean simplify,
- List<Node> result) {
- if (node.getLevel() == detailLevel) {
- node.setLeaf();//FYI might already be a leaf
+ List<Cell> result) {
+ if (cell.getLevel() == detailLevel) {
+ cell.setLeaf();//FYI might already be a leaf
}
- if (node.isLeaf()) {
- result.add(node);
+ if (cell.isLeaf()) {
+ result.add(cell);
return true;
}
- if (inclParents && node.getLevel() != 0)
- result.add(node);
+ if (inclParents && cell.getLevel() != 0)
+ result.add(cell);
- Collection<Node> subCells = node.getSubCells(shape);
+ Collection<Cell> subCells = cell.getSubCells(shape);
int leaves = 0;
- for (Node subCell : subCells) {
- if (recursiveGetNodes(subCell, shape, detailLevel, inclParents, simplify, result))
+ for (Cell subCell : subCells) {
+ if (recursiveGetCells(subCell, shape, detailLevel, inclParents, simplify, result))
leaves++;
}
//can we simplify?
- if (simplify && leaves == node.getSubCellsSize() && node.getLevel() != 0) {
+ if (simplify && leaves == cell.getSubCellsSize() && cell.getLevel() != 0) {
//Optimization: substitute the parent as a leaf instead of adding all
// children as leaves
@@ -178,10 +201,10 @@ public abstract class SpatialPrefixTree
do {
result.remove(result.size() - 1);//remove last
} while (--leaves > 0);
- //add node as the leaf
- node.setLeaf();
+ //add cell as the leaf
+ cell.setLeaf();
if (!inclParents) // otherwise it was already added up above
- result.add(node);
+ result.add(cell);
return true;
}
return false;
@@ -189,23 +212,23 @@ public abstract class SpatialPrefixTree
/**
* A Point-optimized implementation of
- * {@link #getNodes(com.spatial4j.core.shape.Shape, int, boolean, boolean)}. That
+ * {@link #getCells(com.spatial4j.core.shape.Shape, int, boolean, boolean)}. That
* method in facts calls this for points.
* <p/>
- * This implementation depends on {@link #getNode(String)} being fast, as its
+ * This implementation depends on {@link #getCell(String)} being fast, as its
* called repeatedly when incPlarents is true.
*/
- public List<Node> getNodes(Point p, int detailLevel, boolean inclParents) {
- Node cell = getNode(p, detailLevel);
+ public List<Cell> getCells(Point p, int detailLevel, boolean inclParents) {
+ Cell cell = getCell(p, detailLevel);
if (!inclParents) {
return Collections.singletonList(cell);
}
String endToken = cell.getTokenString();
assert endToken.length() == detailLevel;
- List<Node> cells = new ArrayList<Node>(detailLevel);
+ List<Cell> cells = new ArrayList<Cell>(detailLevel);
for (int i = 1; i < detailLevel; i++) {
- cells.add(getNode(endToken.substring(0, i)));
+ cells.add(getCell(endToken.substring(0, i)));
}
cells.add(cell);
return cells;
@@ -214,12 +237,12 @@ public abstract class SpatialPrefixTree
/**
* Will add the trailing leaf byte for leaves. This isn't particularly efficient.
*/
- public static List<String> nodesToTokenStrings(Collection<Node> nodes) {
- List<String> tokens = new ArrayList<String>((nodes.size()));
- for (Node node : nodes) {
- final String token = node.getTokenString();
- if (node.isLeaf()) {
- tokens.add(token + (char) Node.LEAF_BYTE);
+ public static List<String> cellsToTokenStrings(Collection<Cell> cells) {
+ List<String> tokens = new ArrayList<String>((cells.size()));
+ for (Cell cell : cells) {
+ final String token = cell.getTokenString();
+ if (cell.isLeaf()) {
+ tokens.add(token + (char) Cell.LEAF_BYTE);
} else {
tokens.add(token);
}
Modified: lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (original)
+++ lucene/dev/branches/lucene4258/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java Tue May 7 11:20:55 2013
@@ -17,6 +17,7 @@ package org.apache.lucene.spatial.query;
* limitations under the License.
*/
+import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
import com.spatial4j.core.shape.SpatialRelation;
@@ -55,13 +56,14 @@ public abstract class SpatialOperation i
public static final SpatialOperation BBoxWithin = new SpatialOperation("BBoxWithin", true, false, false) {
@Override
public boolean evaluate(Shape indexedShape, Shape queryShape) {
- return indexedShape.getBoundingBox().relate(queryShape) == SpatialRelation.WITHIN;
+ Rectangle bbox = indexedShape.getBoundingBox();
+ return bbox.relate(queryShape) == SpatialRelation.WITHIN || bbox.equals(queryShape);
}
};
public static final SpatialOperation Contains = new SpatialOperation("Contains", true, true, false) {
@Override
public boolean evaluate(Shape indexedShape, Shape queryShape) {
- return indexedShape.hasArea() && indexedShape.relate(queryShape) == SpatialRelation.CONTAINS;
+ return indexedShape.hasArea() && indexedShape.relate(queryShape) == SpatialRelation.CONTAINS || indexedShape.equals(queryShape);
}
};
public static final SpatialOperation Intersects = new SpatialOperation("Intersects", true, false, false) {
@@ -85,7 +87,7 @@ public abstract class SpatialOperation i
public static final SpatialOperation IsWithin = new SpatialOperation("IsWithin", true, false, true) {
@Override
public boolean evaluate(Shape indexedShape, Shape queryShape) {
- return queryShape.hasArea() && indexedShape.relate(queryShape) == SpatialRelation.WITHIN;
+ return queryShape.hasArea() && (indexedShape.relate(queryShape) == SpatialRelation.WITHIN || indexedShape.equals(queryShape));
}
};
public static final SpatialOperation Overlaps = new SpatialOperation("Overlaps", true, false, true) {
Modified: lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestCase.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestCase.java (original)
+++ lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialTestCase.java Tue May 7 11:20:55 2013
@@ -45,7 +45,7 @@ import static com.carrotsearch.randomize
public abstract class SpatialTestCase extends LuceneTestCase {
private DirectoryReader indexReader;
- private RandomIndexWriter indexWriter;
+ protected RandomIndexWriter indexWriter;
private Directory directory;
protected IndexSearcher indexSearcher;
Modified: lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java (original)
+++ lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/StrategyTestCase.java Tue May 7 11:20:55 2013
@@ -26,10 +26,12 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialArgsParser;
@@ -203,6 +205,10 @@ public abstract class StrategyTestCase e
return doc;
}
+ protected void deleteDoc(String id) throws IOException {
+ indexWriter.deleteDocuments(new TermQuery(new Term("id", id)));
+ }
+
/** scores[] are in docId order */
protected void checkValueSource(ValueSource vs, float scores[], float delta) throws IOException {
FunctionQuery q = new FunctionQuery(vs);
Modified: lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/SpatialOpRecursivePrefixTreeTest.java Tue May 7 11:20:55 2013
@@ -19,106 +19,292 @@ package org.apache.lucene.spatial.prefix
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import com.spatial4j.core.context.SpatialContext;
+import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
import com.spatial4j.core.shape.Shape;
+import com.spatial4j.core.shape.SpatialRelation;
import com.spatial4j.core.shape.impl.RectangleImpl;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.StrategyTestCase;
-import org.apache.lucene.spatial.prefix.tree.Node;
+import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
+import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.TreeSet;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
+import static com.spatial4j.core.shape.SpatialRelation.CONTAINS;
+import static com.spatial4j.core.shape.SpatialRelation.DISJOINT;
+import static com.spatial4j.core.shape.SpatialRelation.INTERSECTS;
+import static com.spatial4j.core.shape.SpatialRelation.WITHIN;
public class SpatialOpRecursivePrefixTreeTest extends StrategyTestCase {
private SpatialPrefixTree grid;
- @Test
- @Repeat(iterations = 20)
- public void testIntersects() throws IOException {
- //non-geospatial makes this test a little easier
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ deleteAll();
+ }
+
+ public void mySetup(int maxLevels) throws IOException {
+ //non-geospatial makes this test a little easier (in gridSnap), and using boundary values 2^X raises
+ // the prospect of edge conditions we want to test, plus makes for simpler numbers (no decimals).
this.ctx = new SpatialContext(false, null, new RectangleImpl(0, 256, -128, 128, null));
//A fairly shallow grid, and default 2.5% distErrPct
- this.grid = new QuadPrefixTree(ctx, randomIntBetween(1, 8));
+ if (maxLevels == -1)
+ maxLevels = randomIntBetween(1, 8);
+ this.grid = new QuadPrefixTree(ctx, maxLevels);
this.strategy = new RecursivePrefixTreeStrategy(grid, getClass().getSimpleName());
//((PrefixTreeStrategy) strategy).setDistErrPct(0);//fully precise to grid
- deleteAll();
+ System.out.println("Strategy: " + strategy.toString());
+ }
+
+ @Test
+ @Repeat(iterations = 10)
+ public void testIntersects() throws IOException {
+ mySetup(-1);
+ doTest(SpatialOperation.Intersects);
+ }
+
+ @Test
+ @Repeat(iterations = 10)
+ public void testWithin() throws IOException {
+ mySetup(-1);
+ doTest(SpatialOperation.IsWithin);
+ }
+
+ @Test
+ @Repeat(iterations = 10)
+ public void testContains() throws IOException {
+ mySetup(-1);
+ doTest(SpatialOperation.Contains);
+ }
+
+ @Test
+ @Repeat(iterations = 10)
+ public void testDisjoint() throws IOException {
+ mySetup(-1);
+ doTest(SpatialOperation.IsDisjointTo);
+ }
+
+ @Test
+ public void testWithinDisjointParts() throws IOException {
+ mySetup(7);
+ //one shape comprised of two parts, quite separated apart
+ adoc("0", new ShapePair(ctx.makeRectangle(0, 10, -120, -100), ctx.makeRectangle(220, 240, 110, 125), false));
+ commit();
+ //query surrounds only the second part of the indexed shape
+ Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.IsWithin,
+ ctx.makeRectangle(210, 245, 105, 128)));
+ SearchResults searchResults = executeQuery(query, 1);
+ //we shouldn't find it because it's not completely within
+ assertTrue(searchResults.numFound == 0);
+ }
+
+ @Test /** LUCENE-4916 */
+ public void testWithinLeafApproxRule() throws IOException {
+ mySetup(2);//4x4 grid
+ //indexed shape will simplify to entire right half (2 top cells)
+ adoc("0", ctx.makeRectangle(192, 204, -128, 128));
+ commit();
+
+ ((RecursivePrefixTreeStrategy) strategy).setPrefixGridScanLevel(randomInt(2));
+
+ //query does NOT contain it; both indexed cells are leaves to the query, and
+ // when expanded to the full grid cells, the top one's top row is disjoint
+ // from the query and thus not a match.
+ assertTrue(executeQuery(strategy.makeQuery(
+ new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(38, 192, -72, 56))
+ ), 1).numFound==0);//no-match
+
+ //this time the rect is a little bigger and is considered a match. It's a
+ // an acceptable false-positive because of the grid approximation.
+ assertTrue(executeQuery(strategy.makeQuery(
+ new SpatialArgs(SpatialOperation.IsWithin, ctx.makeRectangle(38, 192, -72, 80))
+ ), 1).numFound==1);//match
+ }
+
+ //Override so we can index parts of a pair separately, resulting in the detailLevel
+ // being independent for each shape vs the whole thing
+ @Override
+ protected Document newDoc(String id, Shape shape) {
+ Document doc = new Document();
+ doc.add(new StringField("id", id, Field.Store.YES));
+ if (shape != null) {
+ Collection<Shape> shapes;
+ if (shape instanceof ShapePair) {
+ shapes = new ArrayList<>(2);
+ shapes.add(((ShapePair)shape).shape1);
+ shapes.add(((ShapePair)shape).shape2);
+ } else {
+ shapes = Collections.singleton(shape);
+ }
+ for (Shape shapei : shapes) {
+ for (Field f : strategy.createIndexableFields(shapei)) {
+ doc.add(f);
+ }
+ }
+ if (storeShape)
+ doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
+ }
+ return doc;
+ }
+
+ private void doTest(final SpatialOperation operation) throws IOException {
+ final boolean biasContains = (operation == SpatialOperation.Contains);
Map<String, Shape> indexedShapes = new LinkedHashMap<String, Shape>();
- Map<String, Rectangle> indexedGriddedShapes = new LinkedHashMap<String, Rectangle>();
+ Map<String, Shape> indexedShapesGS = new LinkedHashMap<String, Shape>();
final int numIndexedShapes = randomIntBetween(1, 6);
- for (int i = 1; i <= numIndexedShapes; i++) {
+ for (int i = 0; i < numIndexedShapes; i++) {
String id = "" + i;
- Shape indexShape = randomRectangle();
- Rectangle gridShape = gridSnapp(indexShape);
- indexedShapes.put(id, indexShape);
- indexedGriddedShapes.put(id, gridShape);
- adoc(id, indexShape);
+ Shape indexedShape;
+ Shape indexedShapeGS; //(grid-snapped)
+ int R = random().nextInt(12);
+ if (R == 0) {//1 in 10
+ indexedShape = null; //no shape for this doc
+ indexedShapeGS = null;
+ } else if (R % 4 == 0) {//3 in 12
+ //comprised of more than one shape
+ Rectangle shape1 = randomRectangle();
+ Rectangle shape2 = randomRectangle();
+ indexedShape = new ShapePair(shape1, shape2, biasContains);
+ indexedShapeGS = new ShapePair(gridSnap(shape1), gridSnap(shape2), biasContains);
+ } else {
+ //just one shape
+ indexedShape = randomRectangle();
+ indexedShapeGS = gridSnap(indexedShape);
+ }
+ indexedShapes.put(id, indexedShape);
+ indexedShapesGS.put(id, indexedShapeGS);
+
+ adoc(id, indexedShape);
+
+ if (random().nextInt(10) == 0)
+ commit();//intermediate commit, produces extra segments
+
+ }
+ Iterator<String> idIter = indexedShapes.keySet().iterator();
+ while (idIter.hasNext()) {
+ String id = idIter.next();
+ if (random().nextInt(10) == 0) {
+ deleteDoc(id);
+ idIter.remove();
+ indexedShapesGS.remove(id);
+ }
}
commit();
- final int numQueryShapes = atLeast(10);
+ final int numQueryShapes = atLeast(20);
for (int i = 0; i < numQueryShapes; i++) {
int scanLevel = randomInt(grid.getMaxLevels());
((RecursivePrefixTreeStrategy) strategy).setPrefixGridScanLevel(scanLevel);
- Rectangle queryShape = randomRectangle();
- Rectangle queryGridShape = gridSnapp(queryShape);
+ final Shape queryShape = randomRectangle();
+
+ final boolean DISJOINT = operation.equals(SpatialOperation.IsDisjointTo);
- //Generate truth via brute force
- final SpatialOperation operation = SpatialOperation.Intersects;
- Set<String> expectedIds = new TreeSet<String>();
- Set<String> optionalIds = new TreeSet<String>();
- for (String id : indexedShapes.keySet()) {
- Shape indexShape = indexedShapes.get(id);
- Rectangle indexGridShape = indexedGriddedShapes.get(id);
- if (operation.evaluate(indexShape, queryShape))
+ //Generate truth via brute force:
+ // We really try to ensure true-positive matches (if the predicate on the raw shapes match
+ // then the search should find those same matches).
+ // approximations, false-positive matches
+ Set <String> expectedIds = new LinkedHashSet<String>();//true-positives
+ Set<String> secondaryIds = new LinkedHashSet<String>();//false-positives (unless disjoint)
+ for (Map.Entry<String, Shape> entry : indexedShapes.entrySet()) {
+ Shape indexedShapeCompare = entry.getValue();
+ if (indexedShapeCompare == null)
+ continue;
+ Shape queryShapeCompare = queryShape;
+ String id = entry.getKey();
+ if (operation.evaluate(indexedShapeCompare, queryShapeCompare)) {
expectedIds.add(id);
- else if (operation.evaluate(indexGridShape, queryGridShape))
- optionalIds.add(id);
+ if (DISJOINT) {
+ //if no longer intersect after buffering them, for disjoint, remember this
+ indexedShapeCompare = indexedShapesGS.get(entry.getKey());
+ queryShapeCompare = gridSnap(queryShape);
+ if (!operation.evaluate(indexedShapeCompare, queryShapeCompare))
+ secondaryIds.add(id);
+ }
+ } else if (!DISJOINT) {
+ //buffer either the indexed or query shape (via gridSnap) and try again
+ if (operation.equals(SpatialOperation.Intersects)) {
+ indexedShapeCompare = indexedShapesGS.get(entry.getKey());
+ queryShapeCompare = gridSnap(queryShape);
+ } else if (operation.equals(SpatialOperation.Contains)) {
+ indexedShapeCompare = indexedShapesGS.get(entry.getKey());
+ } else if (operation.equals(SpatialOperation.IsWithin)) {
+ queryShapeCompare = gridSnap(queryShape);
+ }
+ if (operation.evaluate(indexedShapeCompare, queryShapeCompare))
+ secondaryIds.add(id);
+ }
}
//Search and verify results
- Query query = strategy.makeQuery(new SpatialArgs(operation, queryShape));
+ SpatialArgs args = new SpatialArgs(operation, queryShape);
+ Query query = strategy.makeQuery(args);
SearchResults got = executeQuery(query, 100);
- Set<String> remainingExpectedIds = new TreeSet<String>(expectedIds);
- String msg = queryShape.toString()+" Expect: "+expectedIds+" Opt: "+optionalIds;
+ Set<String> remainingExpectedIds = new LinkedHashSet<String>(expectedIds);
for (SearchResult result : got.results) {
String id = result.getId();
- Object removed = remainingExpectedIds.remove(id);
- if (removed == null) {
- assertTrue("Shouldn't match " + id + " in "+msg, optionalIds.contains(id));
+ boolean removed = remainingExpectedIds.remove(id);
+ if (!removed && (!DISJOINT && !secondaryIds.contains(id))) {
+ fail("Shouldn't match", id, indexedShapes, indexedShapesGS, queryShape);
}
}
- assertTrue("Didn't match " + remainingExpectedIds + " in " + msg, remainingExpectedIds.isEmpty());
+ if (DISJOINT)
+ remainingExpectedIds.removeAll(secondaryIds);
+ if (!remainingExpectedIds.isEmpty()) {
+ String id = remainingExpectedIds.iterator().next();
+ fail("Should have matched", id, indexedShapes, indexedShapesGS, queryShape);
+ }
}
+ }
+ private void fail(String label, String id, Map<String, Shape> indexedShapes, Map<String, Shape> indexedShapesGS, Shape queryShape) {
+ System.err.println("Ig:" + indexedShapesGS.get(id) + " Qg:" + gridSnap(queryShape));
+ fail(label + " I #" + id + ":" + indexedShapes.get(id) + " Q:" + queryShape);
}
- protected Rectangle gridSnapp(Shape snapMe) {
+
+// private Rectangle inset(Rectangle r) {
+// //typically inset by 1 (whole numbers are easy to read)
+// double d = Math.min(1.0, grid.getDistanceForLevel(grid.getMaxLevels()) / 4);
+// return ctx.makeRectangle(r.getMinX() + d, r.getMaxX() - d, r.getMinY() + d, r.getMaxY() - d);
+// }
+
+ protected Rectangle gridSnap(Shape snapMe) {
//The next 4 lines mimic PrefixTreeStrategy.createIndexableFields()
double distErrPct = ((PrefixTreeStrategy) strategy).getDistErrPct();
double distErr = SpatialArgs.calcDistanceFromErrPct(snapMe, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
- List<Node> cells = grid.getNodes(snapMe, detailLevel, false, true);
+ List<Cell> cells = grid.getCells(snapMe, detailLevel, false, true);
//calc bounding box of cells.
double minX = Double.POSITIVE_INFINITY, maxX = Double.NEGATIVE_INFINITY;
double minY = Double.POSITIVE_INFINITY, maxY = Double.NEGATIVE_INFINITY;
- for (Node cell : cells) {
+ for (Cell cell : cells) {
assert cell.getLevel() <= detailLevel;
Rectangle cellR = cell.getShape().getBoundingBox();
@@ -130,4 +316,83 @@ public class SpatialOpRecursivePrefixTre
return ctx.makeRectangle(minX, maxX, minY, maxY);
}
+ /**
+ * An aggregate of 2 shapes. Only implements what's necessary for the test
+ * here. TODO replace with Spatial4j trunk ShapeCollection.
+ */
+ private class ShapePair implements Shape {
+
+ final Rectangle shape1, shape2;
+ final boolean biasContainsThenWithin;//a hack
+
+ public ShapePair(Rectangle shape1, Rectangle shape2, boolean containsThenWithin) {
+ this.shape1 = shape1;
+ this.shape2 = shape2;
+ biasContainsThenWithin = containsThenWithin;
+ }
+
+ @Override
+ public SpatialRelation relate(Shape other) {
+ SpatialRelation r = relateApprox(other);
+ if (r != INTERSECTS)
+ return r;
+ //See if the correct answer is actually Contains
+ Rectangle oRect = (Rectangle)other;
+ boolean pairTouches = shape1.relate(shape2).intersects();
+ if (!pairTouches)
+ return r;
+ //test all 4 corners
+ if (relate(ctx.makePoint(oRect.getMinX(), oRect.getMinY())) == CONTAINS
+ && relate(ctx.makePoint(oRect.getMinX(), oRect.getMaxY())) == CONTAINS
+ && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMinY())) == CONTAINS
+ && relate(ctx.makePoint(oRect.getMaxX(), oRect.getMaxY())) == CONTAINS)
+ return CONTAINS;
+ return r;
+ }
+
+ private SpatialRelation relateApprox(Shape other) {
+ if (biasContainsThenWithin) {
+ if (shape1.relate(other) == CONTAINS || shape1.equals(other)
+ || shape2.relate(other) == CONTAINS || shape2.equals(other)) return CONTAINS;
+
+ if (shape1.relate(other) == WITHIN && shape2.relate(other) == WITHIN) return WITHIN;
+
+ } else {
+ if ((shape1.relate(other) == WITHIN || shape1.equals(other))
+ && (shape2.relate(other) == WITHIN || shape2.equals(other))) return WITHIN;
+
+ if (shape1.relate(other) == CONTAINS || shape2.relate(other) == CONTAINS) return CONTAINS;
+ }
+
+ if (shape1.relate(other).intersects() || shape2.relate(other).intersects())
+ return INTERSECTS;//might actually be 'CONTAINS' if these 2 are adjacent
+ return DISJOINT;
+ }
+
+ @Override
+ public Rectangle getBoundingBox() {
+ return ctx.getWorldBounds();//good enough
+ }
+
+ @Override
+ public boolean hasArea() {
+ return true;
+ }
+
+ @Override
+ public double getArea(SpatialContext ctx) {
+ throw new UnsupportedOperationException("TODO unimplemented");//TODO
+ }
+
+ @Override
+ public Point getCenter() {
+ throw new UnsupportedOperationException("TODO unimplemented");//TODO
+ }
+
+ @Override
+ public String toString() {
+ return "ShapePair(" + shape1 + " , " + shape2 + ")";
+ }
+ }
+
}
Modified: lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/SpatialPrefixTreeTest.java Tue May 7 11:20:55 2013
@@ -1,3 +1,5 @@
+package org.apache.lucene.spatial.prefix.tree;
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -15,8 +17,6 @@
* limitations under the License.
*/
-package org.apache.lucene.spatial.prefix.tree;
-
import com.spatial4j.core.context.SpatialContext;
import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Rectangle;
@@ -49,20 +49,20 @@ public class SpatialPrefixTreeTest exten
}
@Test
- public void testNodeTraverse() {
+ public void testCellTraverse() {
trie = new GeohashPrefixTree(ctx,4);
- Node prevN = null;
- Node n = trie.getWorldNode();
- assertEquals(0,n.getLevel());
- assertEquals(ctx.getWorldBounds(),n.getShape());
- while(n.getLevel() < trie.getMaxLevels()) {
- prevN = n;
- n = n.getSubCells().iterator().next();//TODO random which one?
+ Cell prevC = null;
+ Cell c = trie.getWorldCell();
+ assertEquals(0, c.getLevel());
+ assertEquals(ctx.getWorldBounds(), c.getShape());
+ while(c.getLevel() < trie.getMaxLevels()) {
+ prevC = c;
+ c = c.getSubCells().iterator().next();//TODO random which one?
- assertEquals(prevN.getLevel()+1,n.getLevel());
- Rectangle prevNShape = (Rectangle) prevN.getShape();
- Shape s = n.getShape();
+ assertEquals(prevC.getLevel()+1,c.getLevel());
+ Rectangle prevNShape = (Rectangle) prevC.getShape();
+ Shape s = c.getShape();
Rectangle sbox = s.getBoundingBox();
assertTrue(prevNShape.getWidth() > sbox.getWidth());
assertTrue(prevNShape.getHeight() > sbox.getHeight());
Modified: lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java (original)
+++ lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java Tue May 7 11:20:55 2013
@@ -376,7 +376,7 @@ public class DirectSpellChecker {
suggestions[index--] = suggestion;
}
- ArrayUtil.mergeSort(suggestions, Collections.reverseOrder(comparator));
+ ArrayUtil.timSort(suggestions, Collections.reverseOrder(comparator));
if (numSug < suggestions.length) {
SuggestWord trimmed[] = new SuggestWord[numSug];
System.arraycopy(suggestions, 0, trimmed, 0, numSug);
Modified: lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java (original)
+++ lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefArray.java Tue May 7 11:20:55 2013
@@ -21,12 +21,12 @@ import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.SorterTemplate;
/**
* A simple append only random-access {@link BytesRef} array that stores full
@@ -70,7 +70,7 @@ public final class BytesRefArray {
/**
* Appends a copy of the given {@link BytesRef} to this {@link BytesRefArray}.
* @param bytes the bytes to append
- * @return the ordinal of the appended bytes
+ * @return the index of the appended bytes
*/
public int append(BytesRef bytes) {
if (lastElement >= offsets.length) {
@@ -82,7 +82,7 @@ public final class BytesRefArray {
pool.append(bytes);
offsets[lastElement++] = currentOffset;
currentOffset += bytes.length;
- return lastElement;
+ return lastElement-1;
}
/**
@@ -96,21 +96,21 @@ public final class BytesRefArray {
/**
* Returns the <i>n'th</i> element of this {@link BytesRefArray}
* @param spare a spare {@link BytesRef} instance
- * @param ord the elements ordinal to retrieve
+ * @param index the elements index to retrieve
* @return the <i>n'th</i> element of this {@link BytesRefArray}
*/
- public BytesRef get(BytesRef spare, int ord) {
- if (lastElement > ord) {
- int offset = offsets[ord];
- int length = ord == lastElement - 1 ? currentOffset - offset
- : offsets[ord + 1] - offset;
+ public BytesRef get(BytesRef spare, int index) {
+ if (lastElement > index) {
+ int offset = offsets[index];
+ int length = index == lastElement - 1 ? currentOffset - offset
+ : offsets[index + 1] - offset;
assert spare.offset == 0;
spare.grow(length);
spare.length = length;
pool.readBytes(offset, spare.bytes, spare.offset, spare.length);
return spare;
}
- throw new IndexOutOfBoundsException("index " + ord
+ throw new IndexOutOfBoundsException("index " + index
+ " must be less than the size: " + lastElement);
}
@@ -120,7 +120,7 @@ public final class BytesRefArray {
for (int i = 0; i < orderedEntries.length; i++) {
orderedEntries[i] = i;
}
- new SorterTemplate() {
+ new IntroSorter() {
@Override
protected void swap(int i, int j) {
final int o = orderedEntries[i];
@@ -130,25 +130,25 @@ public final class BytesRefArray {
@Override
protected int compare(int i, int j) {
- final int ord1 = orderedEntries[i], ord2 = orderedEntries[j];
- return comp.compare(get(scratch1, ord1), get(scratch2, ord2));
+ final int idx1 = orderedEntries[i], idx2 = orderedEntries[j];
+ return comp.compare(get(scratch1, idx1), get(scratch2, idx2));
}
@Override
protected void setPivot(int i) {
- final int ord = orderedEntries[i];
- get(pivot, ord);
+ final int index = orderedEntries[i];
+ get(pivot, index);
}
@Override
protected int comparePivot(int j) {
- final int ord = orderedEntries[j];
- return comp.compare(pivot, get(scratch2, ord));
+ final int index = orderedEntries[j];
+ return comp.compare(pivot, get(scratch2, index));
}
private final BytesRef pivot = new BytesRef(), scratch1 = new BytesRef(),
scratch2 = new BytesRef();
- }.quickSort(0, size() - 1);
+ }.sort(0, size());
return orderedEntries;
}
@@ -176,14 +176,14 @@ public final class BytesRefArray {
public BytesRefIterator iterator(final Comparator<BytesRef> comp) {
final BytesRef spare = new BytesRef();
final int size = size();
- final int[] ords = comp == null ? null : sort(comp);
+ final int[] indices = comp == null ? null : sort(comp);
return new BytesRefIterator() {
int pos = 0;
@Override
public BytesRef next() {
if (pos < size) {
- return get(spare, ords == null ? pos++ : ords[pos++]);
+ return get(spare, indices == null ? pos++ : indices[pos++]);
}
return null;
}
Modified: lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/lucene4258/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Tue May 7 11:20:55 2013
@@ -75,9 +75,9 @@ import org.apache.lucene.util.fst.Util;
* example, if you use an analyzer removing stop words,
* then the partial text "ghost chr..." could see the
* suggestion "The Ghost of Christmas Past". Note that
- * your {@code StopFilter} instance must NOT preserve
- * position increments for this example to work, so you should call
- * {@code setEnablePositionIncrements(false)} on it.
+ * position increments MUST NOT be preserved for this example
+ * to work, so you should call
+ * {@link #setPreservePositionIncrements(boolean) setPreservePositionIncrements(false)}.
*
* <p>
* If SynonymFilter is used to map wifi and wireless network to
@@ -185,6 +185,9 @@ public class AnalyzingSuggester extends
private static final int PAYLOAD_SEP = '\u001f';
+ /** Whether position holes should appear in the automaton. */
+ private boolean preservePositionIncrements;
+
/**
* Calls {@link #AnalyzingSuggester(Analyzer,Analyzer,int,int,int)
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
@@ -241,6 +244,13 @@ public class AnalyzingSuggester extends
throw new IllegalArgumentException("maxGraphExpansions must -1 (no limit) or > 0 (got: " + maxGraphExpansions + ")");
}
this.maxGraphExpansions = maxGraphExpansions;
+ preservePositionIncrements = true;
+ }
+
+ /** Whether to take position holes (position increment > 1) into account when
+ * building the automaton, <code>true</code> by default. */
+ public void setPreservePositionIncrements(boolean preservePositionIncrements) {
+ this.preservePositionIncrements = preservePositionIncrements;
}
/** Returns byte size of the underlying FST. */
@@ -327,13 +337,16 @@ public class AnalyzingSuggester extends
}
TokenStreamToAutomaton getTokenStreamToAutomaton() {
+ final TokenStreamToAutomaton tsta;
if (preserveSep) {
- return new EscapingTokenStreamToAutomaton();
+ tsta = new EscapingTokenStreamToAutomaton();
} else {
// When we're not preserving sep, we don't steal 0xff
// byte, so we don't need to do any escaping:
- return new TokenStreamToAutomaton();
+ tsta = new TokenStreamToAutomaton();
}
+ tsta.setPreservePositionIncrements(preservePositionIncrements);
+ return tsta;
}
private static class AnalyzingComparator implements Comparator<BytesRef> {
Modified: lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java (original)
+++ lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestBytesRefArray.java Tue May 7 11:20:55 2013
@@ -39,11 +39,12 @@ public class TestBytesRefArray extends L
}
int entries = atLeast(500);
BytesRef spare = new BytesRef();
+ int initSize = list.size();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = _TestUtil
.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
- list.append(spare);
+ assertEquals(i+initSize, list.append(spare));
stringList.add(randomRealisticUnicodeString);
}
for (int i = 0; i < entries; i++) {
@@ -81,11 +82,12 @@ public class TestBytesRefArray extends L
}
int entries = atLeast(500);
BytesRef spare = new BytesRef();
+ final int initSize = list.size();
for (int i = 0; i < entries; i++) {
String randomRealisticUnicodeString = _TestUtil
.randomRealisticUnicodeString(random);
spare.copyChars(randomRealisticUnicodeString);
- list.append(spare);
+ assertEquals(initSize + i, list.append(spare));
stringList.add(randomRealisticUnicodeString);
}
Modified: lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java Tue May 7 11:20:55 2013
@@ -164,8 +164,9 @@ public class AnalyzingSuggesterTest exte
new TermFreq("the ghost of christmas past", 50),
};
- Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
+ suggester.setPreservePositionIncrements(false);
suggester.build(new TermFreqArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
@@ -187,7 +188,7 @@ public class AnalyzingSuggesterTest exte
}
public void testEmpty() throws Exception {
- Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
AnalyzingSuggester suggester = new AnalyzingSuggester(standard);
suggester.build(new TermFreqArrayIterator(new TermFreq[0]));
Modified: lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java (original)
+++ lucene/dev/branches/lucene4258/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/FuzzySuggesterTest.java Tue May 7 11:20:55 2013
@@ -153,8 +153,9 @@ public class FuzzySuggesterTest extends
new TermFreq("the ghost of christmas past", 50),
};
- Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer standard = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
FuzzySuggester suggester = new FuzzySuggester(standard);
+ suggester.setPreservePositionIncrements(false);
suggester.build(new TermFreqArrayIterator(keys));
List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("the ghost of chris", random()), false, 1);
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/ivy.xml?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/ivy.xml (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/ivy.xml Tue May 7 11:20:55 2013
@@ -32,8 +32,8 @@
<dependency org="org.apache.ant" name="ant" rev="1.8.2" transitive="false" />
<dependency org="junit" name="junit" rev="4.10" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.8" transitive="false" conf="default->*;junit4-stdalone->*" />
- <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.8" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="junit4-ant" rev="2.0.9" transitive="false" conf="default->*;junit4-stdalone->*" />
+ <dependency org="com.carrotsearch.randomizedtesting" name="randomizedtesting-runner" rev="2.0.9" transitive="false" conf="default->*;junit4-stdalone->*" />
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockAnalyzer.java Tue May 7 11:20:55 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
-import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
@@ -46,7 +45,6 @@ public final class MockAnalyzer extends
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;
private final CharacterRunAutomaton filter;
- private final boolean enablePositionIncrements;
private int positionIncrementGap;
private final Random random;
private Map<String,Integer> previousMappings = new HashMap<String,Integer>();
@@ -60,30 +58,28 @@ public final class MockAnalyzer extends
* @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
* @param lowerCase true if the tokenizer should lowercase terms
* @param filter DFA describing how terms should be filtered (set of stopwords, etc)
- * @param enablePositionIncrements true if position increments should reflect filtered terms.
*/
- public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
+ public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter) {
super(new PerFieldReuseStrategy());
// TODO: this should be solved in a different way; Random should not be shared (!).
this.random = new Random(random.nextLong());
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.filter = filter;
- this.enablePositionIncrements = enablePositionIncrements;
}
/**
- * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean)
+ * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton)
* MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
*/
public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
- this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, true);
+ this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET);
}
/**
* Create a Whitespace-lowercasing analyzer with no stopwords removal.
* <p>
- * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton, boolean)
+ * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton)
* MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET, false}).
*/
public MockAnalyzer(Random random) {
@@ -95,7 +91,6 @@ public final class MockAnalyzer extends
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase, maxTokenLength);
tokenizer.setEnableChecks(enableChecks);
MockTokenFilter filt = new MockTokenFilter(tokenizer, filter);
- filt.setEnablePositionIncrements(enablePositionIncrements);
return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
}
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java Tue May 7 11:20:55 2013
@@ -55,7 +55,6 @@ public final class MockTokenFilter exten
makeString("with"))));
private final CharacterRunAutomaton filter;
- private boolean enablePositionIncrements = true;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@@ -80,9 +79,7 @@ public final class MockTokenFilter exten
int skippedPositions = 0;
while (input.incrementToken()) {
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
- if (enablePositionIncrements) {
- posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
- }
+ posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
return true;
}
skippedPositions += posIncrAtt.getPositionIncrement();
@@ -90,20 +87,4 @@ public final class MockTokenFilter exten
// reached EOS -- return false
return false;
}
-
- /**
- * @see #setEnablePositionIncrements(boolean)
- */
- public boolean getEnablePositionIncrements() {
- return enablePositionIncrements;
- }
-
- /**
- * If <code>true</code>, this Filter will preserve
- * positions of the incoming tokens (ie, accumulate and
- * set position increments of the removed stop tokens).
- */
- public void setEnablePositionIncrements(boolean enable) {
- this.enablePositionIncrements = enable;
- }
}
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/cheapbastard/CheapBastardDocValuesProducer.java Tue May 7 11:20:55 2013
@@ -17,6 +17,10 @@ package org.apache.lucene.codecs.cheapba
* limitations under the License.
*/
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.diskdv.DiskDocValuesConsumer.TABLE_COMPRESSED;
+
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
@@ -37,6 +41,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.BlockPackedReader;
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
+import org.apache.lucene.util.packed.PackedInts;
class CheapBastardDocValuesProducer extends DocValuesProducer {
private final Map<Integer,NumericEntry> numerics;
@@ -50,15 +55,17 @@ class CheapBastardDocValuesProducer exte
// read in the entries from the metadata file.
IndexInput in = state.directory.openInput(metaName, state.context);
boolean success = false;
+ final int version;
try {
- CodecUtil.checkHeader(in, metaCodec,
- DiskDocValuesFormat.VERSION_START,
- DiskDocValuesFormat.VERSION_START);
+ version = CodecUtil.checkHeader(in, metaCodec,
+ DiskDocValuesFormat.VERSION_START,
+ DiskDocValuesFormat.VERSION_CURRENT);
numerics = new HashMap<Integer,NumericEntry>();
ords = new HashMap<Integer,NumericEntry>();
ordIndexes = new HashMap<Integer,NumericEntry>();
binaries = new HashMap<Integer,BinaryEntry>();
readFields(in);
+
success = true;
} finally {
if (success) {
@@ -67,12 +74,25 @@ class CheapBastardDocValuesProducer exte
IOUtils.closeWhileHandlingException(in);
}
}
-
- String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
- data = state.directory.openInput(dataName, state.context);
- CodecUtil.checkHeader(data, dataCodec,
- DiskDocValuesFormat.VERSION_START,
- DiskDocValuesFormat.VERSION_START);
+
+ success = false;
+ try {
+ String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+ data = state.directory.openInput(dataName, state.context);
+ final int version2 = CodecUtil.checkHeader(data, dataCodec,
+ DiskDocValuesFormat.VERSION_START,
+ DiskDocValuesFormat.VERSION_CURRENT);
+ if (version != version2) {
+ throw new CorruptIndexException("Versions mismatch");
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(this.data);
+ }
+ }
+
}
private void readFields(IndexInput meta) throws IOException {
@@ -140,10 +160,34 @@ class CheapBastardDocValuesProducer exte
static NumericEntry readNumericEntry(IndexInput meta) throws IOException {
NumericEntry entry = new NumericEntry();
+ entry.format = meta.readVInt();
entry.packedIntsVersion = meta.readVInt();
entry.offset = meta.readLong();
entry.count = meta.readVLong();
entry.blockSize = meta.readVInt();
+ switch(entry.format) {
+ case GCD_COMPRESSED:
+ entry.minValue = meta.readLong();
+ entry.gcd = meta.readLong();
+ break;
+ case TABLE_COMPRESSED:
+ if (entry.count > Integer.MAX_VALUE) {
+ throw new CorruptIndexException("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta);
+ }
+ final int uniqueValues = meta.readVInt();
+ if (uniqueValues > 256) {
+ throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta);
+ }
+ entry.table = new long[uniqueValues];
+ for (int i = 0; i < uniqueValues; ++i) {
+ entry.table[i] = meta.readLong();
+ }
+ break;
+ case DELTA_COMPRESSED:
+ break;
+ default:
+ throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+ }
return entry;
}
@@ -171,13 +215,38 @@ class CheapBastardDocValuesProducer exte
final IndexInput data = this.data.clone();
data.seek(entry.offset);
- final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
- return new LongNumericDocValues() {
- @Override
- public long get(long id) {
- return reader.get(id);
- }
- };
+ switch (entry.format) {
+ case DELTA_COMPRESSED:
+ final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+ return new LongNumericDocValues() {
+ @Override
+ public long get(long id) {
+ return reader.get(id);
+ }
+ };
+ case GCD_COMPRESSED:
+ final long min = entry.minValue;
+ final long mult = entry.gcd;
+ final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, entry.blockSize, entry.count, true);
+ return new LongNumericDocValues() {
+ @Override
+ public long get(long id) {
+ return min + mult * quotientReader.get(id);
+ }
+ };
+ case TABLE_COMPRESSED:
+ final long[] table = entry.table;
+ final int bitsRequired = PackedInts.bitsRequired(table.length - 1);
+ final PackedInts.Reader ords = PackedInts.getDirectReaderNoHeader(data, PackedInts.Format.PACKED, entry.packedIntsVersion, (int) entry.count, bitsRequired);
+ return new LongNumericDocValues() {
+ @Override
+ long get(long id) {
+ return table[(int) ords.get((int) id)];
+ }
+ };
+ default:
+ throw new AssertionError();
+ }
}
@Override
@@ -315,9 +384,14 @@ class CheapBastardDocValuesProducer exte
static class NumericEntry {
long offset;
+ int format;
int packedIntsVersion;
long count;
int blockSize;
+
+ long minValue;
+ long gcd;
+ long table[];
}
static class BinaryEntry {
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java Tue May 7 11:20:55 2013
@@ -400,11 +400,8 @@ public final class RAMOnlyPostingsFormat
}
@Override
- public int advance(int targetDocID) {
- do {
- nextDoc();
- } while (upto < ramTerm.docs.size() && current.docID < targetDocID);
- return NO_MORE_DOCS;
+ public int advance(int targetDocID) throws IOException {
+ return slowAdvance(targetDocID);
}
// TODO: override bulk read, for better perf
@@ -453,11 +450,8 @@ public final class RAMOnlyPostingsFormat
}
@Override
- public int advance(int targetDocID) {
- do {
- nextDoc();
- } while (upto < ramTerm.docs.size() && current.docID < targetDocID);
- return NO_MORE_DOCS;
+ public int advance(int targetDocID) throws IOException {
+ return slowAdvance(targetDocID);
}
// TODO: override bulk read, for better perf
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java Tue May 7 11:20:55 2013
@@ -84,7 +84,7 @@ public class AssertingAtomicReader exten
@Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws IOException {
- TermsEnum termsEnum = super.intersect(automaton, bytes);
+ TermsEnum termsEnum = in.intersect(automaton, bytes);
assert termsEnum != null;
assert bytes == null || bytes.isValid();
return new AssertingTermsEnum(termsEnum);
@@ -223,45 +223,63 @@ public class AssertingAtomicReader exten
}
static enum DocsEnumState { START, ITERATING, FINISHED };
- static class AssertingDocsEnum extends FilterDocsEnum {
+
+ /** Wraps a docsenum with additional checks */
+ public static class AssertingDocsEnum extends FilterDocsEnum {
private DocsEnumState state = DocsEnumState.START;
+ private int doc;
public AssertingDocsEnum(DocsEnum in) {
+ this(in, true);
+ }
+
+ public AssertingDocsEnum(DocsEnum in, boolean failOnUnsupportedDocID) {
super(in);
- int docid = in.docID();
- assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS : "invalid initial doc id: " + docid;
+ try {
+ int docid = in.docID();
+ assert docid == -1 : in.getClass() + ": invalid initial doc id: " + docid;
+ } catch (UnsupportedOperationException e) {
+ if (failOnUnsupportedDocID) {
+ throw e;
+ }
+ }
+ doc = -1;
}
@Override
public int nextDoc() throws IOException {
assert state != DocsEnumState.FINISHED : "nextDoc() called after NO_MORE_DOCS";
int nextDoc = super.nextDoc();
- assert nextDoc >= 0 : "invalid doc id: " + nextDoc;
+ assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc + " " + in;
if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) {
state = DocsEnumState.FINISHED;
} else {
state = DocsEnumState.ITERATING;
}
- return nextDoc;
+ assert super.docID() == nextDoc;
+ return doc = nextDoc;
}
@Override
public int advance(int target) throws IOException {
assert state != DocsEnumState.FINISHED : "advance() called after NO_MORE_DOCS";
+ assert target > doc : "target must be > docID(), got " + target + " <= " + doc;
int advanced = super.advance(target);
- assert advanced >= 0 : "invalid doc id: " + advanced;
assert advanced >= target : "backwards advance from: " + target + " to: " + advanced;
if (advanced == DocIdSetIterator.NO_MORE_DOCS) {
state = DocsEnumState.FINISHED;
} else {
state = DocsEnumState.ITERATING;
}
- return advanced;
+ assert super.docID() == advanced;
+ return doc = advanced;
}
- // NOTE: We don't assert anything for docId(). Specifically DocsEnum javadocs
- // are ambiguous with DocIdSetIterator here, DocIdSetIterator says its ok
- // to call this method before nextDoc(), just that it must be -1 or NO_MORE_DOCS!
+ @Override
+ public int docID() {
+ assert doc == super.docID() : " invalid docID() in " + in.getClass() + " " + super.docID() + " instead of " + doc;
+ return doc;
+ }
@Override
public int freq() throws IOException {
@@ -277,18 +295,20 @@ public class AssertingAtomicReader exten
private DocsEnumState state = DocsEnumState.START;
private int positionMax = 0;
private int positionCount = 0;
+ private int doc;
public AssertingDocsAndPositionsEnum(DocsAndPositionsEnum in) {
super(in);
int docid = in.docID();
- assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS : "invalid initial doc id: " + docid;
+ assert docid == -1 : "invalid initial doc id: " + docid;
+ doc = -1;
}
@Override
public int nextDoc() throws IOException {
assert state != DocsEnumState.FINISHED : "nextDoc() called after NO_MORE_DOCS";
int nextDoc = super.nextDoc();
- assert nextDoc >= 0 : "invalid doc id: " + nextDoc;
+ assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc;
positionCount = 0;
if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) {
state = DocsEnumState.FINISHED;
@@ -297,14 +317,15 @@ public class AssertingAtomicReader exten
state = DocsEnumState.ITERATING;
positionMax = super.freq();
}
- return nextDoc;
+ assert super.docID() == nextDoc;
+ return doc = nextDoc;
}
@Override
public int advance(int target) throws IOException {
assert state != DocsEnumState.FINISHED : "advance() called after NO_MORE_DOCS";
+ assert target > doc : "target must be > docID(), got " + target + " <= " + doc;
int advanced = super.advance(target);
- assert advanced >= 0 : "invalid doc id: " + advanced;
assert advanced >= target : "backwards advance from: " + target + " to: " + advanced;
positionCount = 0;
if (advanced == DocIdSetIterator.NO_MORE_DOCS) {
@@ -314,7 +335,14 @@ public class AssertingAtomicReader exten
state = DocsEnumState.ITERATING;
positionMax = super.freq();
}
- return advanced;
+ assert super.docID() == advanced;
+ return doc = advanced;
+ }
+
+ @Override
+ public int docID() {
+ assert doc == super.docID() : " invalid docID() in " + in.getClass() + " " + super.docID() + " instead of " + doc;
+ return doc;
}
@Override
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/AssertingDirectoryReader.java Tue May 7 11:20:55 2013
@@ -17,68 +17,29 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.List;
-
/**
* A {@link DirectoryReader} that wraps all its subreaders with
* {@link AssertingAtomicReader}
*/
-public class AssertingDirectoryReader extends DirectoryReader {
- protected DirectoryReader in;
+public class AssertingDirectoryReader extends FilterDirectoryReader {
- public AssertingDirectoryReader(DirectoryReader in) {
- super(in.directory(), wrap(in.getSequentialSubReaders()));
- this.in = in;
- }
-
- private static AtomicReader[] wrap(List<? extends AtomicReader> readers) {
- AtomicReader[] wrapped = new AtomicReader[readers.size()];
- for (int i = 0; i < readers.size(); i++) {
- wrapped[i] = new AssertingAtomicReader(readers.get(i));
+ static class AssertingSubReaderWrapper extends SubReaderWrapper {
+ @Override
+ public AtomicReader wrap(AtomicReader reader) {
+ return new AssertingAtomicReader(reader);
}
- return wrapped;
- }
-
- @Override
- protected DirectoryReader doOpenIfChanged() throws IOException {
- DirectoryReader d = in.doOpenIfChanged();
- return d == null ? null : new AssertingDirectoryReader(d);
- }
-
- @Override
- protected DirectoryReader doOpenIfChanged(IndexCommit commit) throws IOException {
- DirectoryReader d = in.doOpenIfChanged(commit);
- return d == null ? null : new AssertingDirectoryReader(d);
- }
-
- @Override
- protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws IOException {
- DirectoryReader d = in.doOpenIfChanged(writer, applyAllDeletes);
- return d == null ? null : new AssertingDirectoryReader(d);
- }
-
- @Override
- public long getVersion() {
- return in.getVersion();
}
- @Override
- public boolean isCurrent() throws IOException {
- return in.isCurrent();
+ public AssertingDirectoryReader(DirectoryReader in) {
+ super(in, new AssertingSubReaderWrapper());
}
@Override
- public IndexCommit getIndexCommit() throws IOException {
- return in.getIndexCommit();
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
+ return new AssertingDirectoryReader(in);
}
@Override
- protected void doClose() throws IOException {
- in.doClose();
- }
-
- @Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@@ -87,4 +48,5 @@ public class AssertingDirectoryReader ex
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
+
}
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java Tue May 7 11:20:55 2013
@@ -25,15 +25,13 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
-import java.util.Map.Entry;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.DocValuesFormat;
-import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -44,7 +42,6 @@ import org.apache.lucene.document.Sorted
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -1122,8 +1119,21 @@ public abstract class BaseDocValuesForma
w.close();
dir.close();
}
-
- private void doTestNumericsVsStoredFields(long minValue, long maxValue) throws Exception {
+
+ static abstract class LongProducer {
+ abstract long next();
+ }
+
+ private void doTestNumericsVsStoredFields(final long minValue, final long maxValue) throws Exception {
+ doTestNumericsVsStoredFields(new LongProducer() {
+ @Override
+ long next() {
+ return _TestUtil.nextLong(random(), minValue, maxValue);
+ }
+ });
+ }
+
+ private void doTestNumericsVsStoredFields(LongProducer longs) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
@@ -1136,10 +1146,13 @@ public abstract class BaseDocValuesForma
doc.add(dvField);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
+ // numDocs should be always > 256 so that in case of a codec that optimizes
+ // for numbers of values <= 256, all storage layouts are tested
+ assert numDocs > 256;
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
- long value = _TestUtil.nextLong(random(), minValue, maxValue);
+ long value = longs.next();
storedField.setStringValue(Long.toString(value));
dvField.setLongValue(value);
writer.addDocument(doc);
@@ -1154,6 +1167,11 @@ public abstract class BaseDocValuesForma
int id = random().nextInt(numDocs);
writer.deleteDocuments(new Term("id", Integer.toString(id)));
}
+
+ // merge some segments and ensure that at least one of them has more than
+ // 256 values
+ writer.forceMerge(numDocs / 256);
+
writer.close();
// compare
@@ -1218,7 +1236,7 @@ public abstract class BaseDocValuesForma
doc.add(dvField);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
@@ -1289,7 +1307,7 @@ public abstract class BaseDocValuesForma
doc.add(dvField);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
idField.setStringValue(Integer.toString(i));
final int length;
@@ -1801,7 +1819,7 @@ public abstract class BaseDocValuesForma
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
@@ -1922,7 +1940,7 @@ public abstract class BaseDocValuesForma
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
// index some docs
- int numDocs = atLeast(1000);
+ int numDocs = atLeast(300);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field idField = new StringField("id", Integer.toString(i), Field.Store.NO);
@@ -2007,4 +2025,39 @@ public abstract class BaseDocValuesForma
doTestSortedSetVsUninvertedField(1, 10);
}
}
+
+ public void testGCDCompression() throws Exception {
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ final long min = - (((long) random().nextInt(1 << 30)) << 32);
+ final long mul = random().nextInt() & 0xFFFFFFFFL;
+ final LongProducer longs = new LongProducer() {
+ @Override
+ long next() {
+ return min + mul * random().nextInt(1 << 20);
+ }
+ };
+ doTestNumericsVsStoredFields(longs);
+ }
+ }
+
+ public void testZeros() throws Exception {
+ doTestNumericsVsStoredFields(0, 0);
+ }
+
+ public void testZeroOrMin() throws Exception {
+ // try to make GCD compression fail if the format did not anticipate that
+ // the GCD of 0 and MIN_VALUE is negative
+ int numIterations = atLeast(1);
+ for (int i = 0; i < numIterations; i++) {
+ final LongProducer longs = new LongProducer() {
+ @Override
+ long next() {
+ return random().nextBoolean() ? 0 : Long.MIN_VALUE;
+ }
+ };
+ doTestNumericsVsStoredFields(longs);
+ }
+ }
+
}
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java Tue May 7 11:20:55 2013
@@ -66,9 +66,6 @@ import org.junit.BeforeClass;
// TODO test when you reuse after skipping a term or two, eg the block reuse case
-// TODO hmm contract says .doc() can return NO_MORE_DOCS
-// before nextDoc too...?
-
/* TODO
- threads
- assert doc=-1 before any nextDoc
@@ -275,10 +272,8 @@ public abstract class BasePostingsFormat
}
@Override
- public int advance(int target) {
- while(nextDoc() < target) {
- }
- return docID;
+ public int advance(int target) throws IOException {
+ return slowAdvance(target);
}
@Override
@@ -701,7 +696,7 @@ public abstract class BasePostingsFormat
assertNotNull("null DocsEnum", docsEnum);
int initialDocID = docsEnum.docID();
- assertTrue("inital docID should be -1 or NO_MORE_DOCS: " + docsEnum, initialDocID == -1 || initialDocID == DocsEnum.NO_MORE_DOCS);
+ assertEquals("inital docID should be -1" + docsEnum, -1, initialDocID);
if (VERBOSE) {
if (prevDocsEnum == null) {
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/BaseTermVectorsFormatTestCase.java Tue May 7 11:20:55 2013
@@ -520,7 +520,7 @@ public abstract class BaseTermVectorsFor
public void testRareVectors() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(10, 20);
for (Options options : validOptions()) {
- final int numDocs = _TestUtil.nextInt(random(), 10, 10000);
+ final int numDocs = atLeast(200);
final int docWithVectors = random().nextInt(numDocs);
final Document emptyDoc = new Document();
final Directory dir = newDirectory();
@@ -560,7 +560,7 @@ public abstract class BaseTermVectorsFor
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
- final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 2), _TestUtil.nextInt(random(), 50000, 100000), options);
+ final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 2), atLeast(20000), options);
writer.addDocument(doc.toDocument());
final IndexReader reader = writer.getReader();
assertEquals(doc, reader.getTermVectors(0));
@@ -575,7 +575,7 @@ public abstract class BaseTermVectorsFor
for (Options options : validOptions()) {
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
- final RandomDocument doc = docFactory.newDocument(_TestUtil.nextInt(random(), 500, 1000), 5, options);
+ final RandomDocument doc = docFactory.newDocument(atLeast(100), 5, options);
writer.addDocument(doc.toDocument());
final IndexReader reader = writer.getReader();
assertEquals(doc, reader.getTermVectors(0));
@@ -614,7 +614,7 @@ public abstract class BaseTermVectorsFor
public void testRandom() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
- final int numDocs = _TestUtil.nextInt(random(), 100, 1000);
+ final int numDocs = atLeast(100);
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), randomOptions());
@@ -636,7 +636,7 @@ public abstract class BaseTermVectorsFor
public void testMerge() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
- final int numDocs = _TestUtil.nextInt(random(), 100, 500);
+ final int numDocs = atLeast(100);
final int numDeletes = random().nextInt(numDocs);
final Set<Integer> deletes = new HashSet<Integer>();
while (deletes.size() < numDeletes) {
@@ -645,7 +645,7 @@ public abstract class BaseTermVectorsFor
for (Options options : validOptions()) {
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
- docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), options);
+ docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
@@ -677,11 +677,11 @@ public abstract class BaseTermVectorsFor
// don't share mutable data
public void testClone() throws IOException, InterruptedException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
- final int numDocs = _TestUtil.nextInt(random(), 100, 1000);
+ final int numDocs = atLeast(100);
for (Options options : validOptions()) {
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
- docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), _TestUtil.nextInt(random(), 10, 50), options);
+ docs[i] = docFactory.newDocument(_TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Modified: lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java Tue May 7 11:20:55 2013
@@ -103,7 +103,7 @@ public class RandomIndexWriter implement
// Make sure we sometimes test indices that don't get
// any forced merges:
- doRandomForceMerge = r.nextBoolean();
+ doRandomForceMerge = !(c.getMergePolicy() instanceof NoMergePolicy) && r.nextBoolean();
}
/**