You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/03/28 17:09:23 UTC

lucene-solr:branch_6x: simplify base geo test class

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 81f32f272 -> 9cccffad4


simplify base geo test class


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9cccffad
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9cccffad
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9cccffad

Branch: refs/heads/branch_6x
Commit: 9cccffad41858998ad9e9d0ef926c75d59438bb9
Parents: 81f32f2
Author: Mike McCandless <mi...@apache.org>
Authored: Mon Mar 28 11:08:53 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Mon Mar 28 11:09:31 2016 -0400

----------------------------------------------------------------------
 .../spatial/util/BaseGeoPointTestCase.java      | 468 +++++++++++++------
 1 file changed, 327 insertions(+), 141 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cccffad/lucene/spatial/src/test/org/apache/lucene/spatial/util/BaseGeoPointTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/spatial/src/test/org/apache/lucene/spatial/util/BaseGeoPointTestCase.java b/lucene/spatial/src/test/org/apache/lucene/spatial/util/BaseGeoPointTestCase.java
index ce908e7..c0633eb 100644
--- a/lucene/spatial/src/test/org/apache/lucene/spatial/util/BaseGeoPointTestCase.java
+++ b/lucene/spatial/src/test/org/apache/lucene/spatial/util/BaseGeoPointTestCase.java
@@ -45,6 +45,7 @@ import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SegmentReadState;
@@ -57,6 +58,7 @@ import org.apache.lucene.search.SimpleCollector;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
@@ -432,10 +434,9 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
     IndexReader r = w.getReader();
     w.close();
 
-    // We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
-    IndexSearcher s = newSearcher(r, false);
+    IndexSearcher s = newSearcher(r);
 
-    int iters = atLeast(75);
+    int iters = atLeast(25);
     for (int iter=0;iter<iters;iter++) {
       GeoRect rect = randomRect(small, small == false);
 
@@ -482,9 +483,9 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
         if (hits.get(docID) != expected) {
           String id = s.doc(docID).get("id");
           if (expected) {
-            System.out.println(Thread.currentThread().getName() + ": id=" + id + " docID=" + docID + " should match but did not");
+            System.out.println("TEST: id=" + id + " docID=" + docID + " should match but did not");
           } else {
-            System.out.println(Thread.currentThread().getName() + ": id=" + id + " docID=" + docID + " should not match but did");
+            System.out.println("TEST: id=" + id + " docID=" + docID + " should not match but did");
           }
           System.out.println("  rect=" + rect);
           System.out.println("  lat=" + latDoc1 + " lon=" + lonDoc1 + "\n  lat=" + latDoc2 + " lon=" + lonDoc2);
@@ -737,10 +738,77 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
     return result;
   }
 
-  private static abstract class VerifyHits {
+  private void verify(boolean small, double[] lats, double[] lons) throws Exception {
+    verifyRandomRectangles(small, lats, lons);
+    verifyRandomDistances(small, lats, lons);
+    verifyRandomPolygons(small, lats, lons);
+  }
+
+  protected void verifyRandomRectangles(boolean small, double[] lats, double[] lons) throws Exception {
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    // Else we can get O(N^2) merging:
+    int mbd = iwc.getMaxBufferedDocs();
+    if (mbd != -1 && mbd < lats.length/100) {
+      iwc.setMaxBufferedDocs(lats.length/100);
+    }
+    Directory dir;
+    if (lats.length > 100000) {
+      dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
+    } else {
+      dir = newDirectory();
+    }
+
+    Set<Integer> deleted = new HashSet<>();
+    // RandomIndexWriter is too slow here:
+    IndexWriter w = new IndexWriter(dir, iwc);
+    for(int id=0;id<lats.length;id++) {
+      Document doc = new Document();
+      doc.add(newStringField("id", ""+id, Field.Store.NO));
+      doc.add(new NumericDocValuesField("id", id));
+      if (Double.isNaN(lats[id]) == false) {
+        addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
+      }
+      w.addDocument(doc);
+      if (id > 0 && random().nextInt(100) == 42) {
+        int idToDelete = random().nextInt(id);
+        w.deleteDocuments(new Term("id", ""+idToDelete));
+        deleted.add(idToDelete);
+        if (VERBOSE) {
+          System.out.println("  delete id=" + idToDelete);
+        }
+      }
+    }
+
+    if (random().nextBoolean()) {
+      w.forceMerge(1);
+    }
+    final IndexReader r = DirectoryReader.open(w);
+    w.close();
+
+    IndexSearcher s = newSearcher(r);
+
+    int iters = atLeast(25);
+
+    NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
+
+    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
+    int maxDoc = s.getIndexReader().maxDoc();
+
+    for (int iter=0;iter<iters;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " s=" + s);
+      }
+      
+      // Rect: don't allow dateline crossing when testing small:
+      GeoRect rect = randomRect(small, small == false);
+
+      Query query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
+
+      if (VERBOSE) {
+        System.out.println("  query=" + query);
+      }
 
-    public void test(AtomicBoolean failed, boolean small, IndexSearcher s, NumericDocValues docIDToID, Set<Integer> deleted, Query query, double[] lats, double[] lons) throws Exception {
-      int maxDoc = s.getIndexReader().maxDoc();
       final FixedBitSet hits = new FixedBitSet(maxDoc);
       s.search(query, new SimpleCollector() {
 
@@ -763,60 +831,46 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
         });
 
       boolean fail = false;
-
-      // Change to false to see all wrong hits:
-      boolean failFast = true;
-
       for(int docID=0;docID<maxDoc;docID++) {
         int id = (int) docIDToID.get(docID);
         boolean expected;
-        if (deleted.contains(id)) {
+        if (liveDocs != null && liveDocs.get(docID) == false) {
+          // document is deleted
           expected = false;
         } else if (Double.isNaN(lats[id])) {
           expected = false;
         } else {
-          expected = shouldMatch(lats[id], lons[id]);
+          expected = rectContainsPoint(rect, lats[id], lons[id]);
         }
 
         if (hits.get(docID) != expected) {
+          StringBuilder b = new StringBuilder();
 
-          // Print only one failed hit; add a true || in here to see all failures:
-          if (failFast == false || failed.getAndSet(true) == false) {
-            if (expected) {
-              System.out.println(Thread.currentThread().getName() + ": id=" + id + " should match but did not");
-            } else {
-              System.out.println(Thread.currentThread().getName() + ": id=" + id + " should not match but did");
-            }
-            System.out.println("  small=" + small + " query=" + query +
-                               " docID=" + docID + "\n  lat=" + lats[id] + " lon=" + lons[id] +
-                               "\n  deleted?=" + deleted.contains(id));
-            if (Double.isNaN(lats[id]) == false) {
-              describe(docID, lats[id], lons[id]);
-            }
-            if (failFast) {
-              fail("wrong hit (first of possibly more)");
-            } else {
-              fail = true;
-            }
+          if (expected) {
+            b.append("FAIL: id=" + id + " should match but did not\n");
+          } else {
+            b.append("FAIL: id=" + id + " should not match but did\n");
+          }
+          b.append("  query=" + query + " docID=" + docID + "\n");
+          b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
+          b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
+          if (true) {
+            fail("wrong hit (first of possibly more):\n\n" + b);
+          } else {
+            System.out.println(b.toString());
+            fail = true;
           }
         }
       }
-
       if (fail) {
-        failed.set(true);
         fail("some hits were wrong");
       }
     }
 
-    /** Return true if we definitely should match, false if we definitely
-     *  should not match, and null if it's a borderline case which might
-     *  go either way. */
-    protected abstract boolean shouldMatch(double lat, double lon);
-
-    protected abstract void describe(int docID, double lat, double lon);
+    IOUtils.close(r, dir);
   }
 
-  protected void verify(boolean small, double[] lats, double[] lons) throws Exception {
+  protected void verifyRandomDistances(boolean small, double[] lats, double[] lons) throws Exception {
     IndexWriterConfig iwc = newIndexWriterConfig();
     // Else we can get O(N^2) merging:
     int mbd = iwc.getMaxBufferedDocs();
@@ -857,142 +911,274 @@ public abstract class BaseGeoPointTestCase extends LuceneTestCase {
     final IndexReader r = DirectoryReader.open(w);
     w.close();
 
-    // We can't wrap with "exotic" readers because the BKD query must see the BKDDVFormat:
-    IndexSearcher s = newSearcher(r, false);
+    IndexSearcher s = newSearcher(r);
 
-    final int iters = atLeast(75);
-
-    final AtomicBoolean failed = new AtomicBoolean();
+    int iters = atLeast(25);
 
     NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
 
-    for (int iter=0;iter<iters && failed.get() == false;iter++) {
+    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
+    int maxDoc = s.getIndexReader().maxDoc();
+
+    for (int iter=0;iter<iters;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " s=" + s);
+      }
+
+      // Distance
+      final double centerLat = randomLat(small);
+      final double centerLon = randomLon(small);
+
+      final double radiusMeters;
+      if (small) {
+        // Approx 3 degrees lon at the equator:
+        radiusMeters = random().nextDouble() * 333000 + 1.0;
+      } else {
+        // So the query can cover at most 50% of the earth's surface:
+        radiusMeters = random().nextDouble() * GeoUtils.SEMIMAJOR_AXIS * Math.PI / 2.0 + 1.0;
+      }
+
+      if (VERBOSE) {
+        final DecimalFormat df = new DecimalFormat("#,###.00", DecimalFormatSymbols.getInstance(Locale.ENGLISH));
+        System.out.println("  radiusMeters = " + df.format(radiusMeters));
+      }
+
+      Query query = newDistanceQuery(FIELD_NAME, centerLat, centerLon, radiusMeters);
 
       if (VERBOSE) {
-        System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " s=" + s);
+        System.out.println("  query=" + query);
       }
-      Query query;
-      VerifyHits verifyHits;
 
-      if (random().nextBoolean()) {
-        // Rect: don't allow dateline crossing when testing small:
-        final GeoRect rect = randomRect(small, small == false);
+      final FixedBitSet hits = new FixedBitSet(maxDoc);
+      s.search(query, new SimpleCollector() {
 
-        query = newRectQuery(FIELD_NAME, rect.minLat, rect.maxLat, rect.minLon, rect.maxLon);
+          private int docBase;
 
-        verifyHits = new VerifyHits() {
           @Override
-          protected boolean shouldMatch(double pointLat, double pointLon) {
-            return rectContainsPoint(rect, pointLat, pointLon);
+          public boolean needsScores() {
+            return false;
           }
+
           @Override
-          protected void describe(int docID, double lat, double lon) {
+          protected void doSetNextReader(LeafReaderContext context) throws IOException {
+            docBase = context.docBase;
           }
-        };
 
-      } else if (random().nextBoolean()) {
-        // Distance
-        final double centerLat = randomLat(small);
-        final double centerLon = randomLon(small);
+          @Override
+          public void collect(int doc) {
+            hits.set(docBase+doc);
+          }
+        });
 
-        final double radiusMeters;
-        if (small) {
-          // Approx 3 degrees lon at the equator:
-          radiusMeters = random().nextDouble() * 333000 + 1.0;
+      boolean fail = false;
+      for(int docID=0;docID<maxDoc;docID++) {
+        int id = (int) docIDToID.get(docID);
+        boolean expected;
+        if (liveDocs != null && liveDocs.get(docID) == false) {
+          // document is deleted
+          expected = false;
+        } else if (Double.isNaN(lats[id])) {
+          expected = false;
         } else {
-          // So the query can cover at most 50% of the earth's surface:
-          radiusMeters = random().nextDouble() * GeoUtils.SEMIMAJOR_AXIS * Math.PI / 2.0 + 1.0;
+          expected = circleContainsPoint(centerLat, centerLon, radiusMeters, lats[id], lons[id]);
+        }
+
+        if (hits.get(docID) != expected) {
+          StringBuilder b = new StringBuilder();
+
+          if (expected) {
+            b.append("FAIL: id=" + id + " should match but did not\n");
+          } else {
+            b.append("FAIL: id=" + id + " should not match but did\n");
+          }
+          b.append("  query=" + query + " docID=" + docID + "\n");
+          b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
+          b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
+          if (Double.isNaN(lats[id]) == false) {
+            double distanceMeters = SloppyMath.haversinMeters(centerLat, centerLon, lats[id], lons[id]);
+            b.append("  centerLat=" + centerLat + " centerLon=" + centerLon + " distanceMeters=" + distanceMeters + " vs radiusMeters=" + radiusMeters);
+          }
+          if (true) {
+            fail("wrong hit (first of possibly more):\n\n" + b);
+          } else {
+            System.out.println(b.toString());
+            fail = true;
+          }
         }
+      }
+      if (fail) {
+        fail("some hits were wrong");
+      }
+    }
+
+    IOUtils.close(r, dir);
+  }
 
+  protected void verifyRandomPolygons(boolean small, double[] lats, double[] lons) throws Exception {
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    // Else we can get O(N^2) merging:
+    int mbd = iwc.getMaxBufferedDocs();
+    if (mbd != -1 && mbd < lats.length/100) {
+      iwc.setMaxBufferedDocs(lats.length/100);
+    }
+    Directory dir;
+    if (lats.length > 100000) {
+      dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
+    } else {
+      dir = newDirectory();
+    }
+
+    Set<Integer> deleted = new HashSet<>();
+    // RandomIndexWriter is too slow here:
+    IndexWriter w = new IndexWriter(dir, iwc);
+    for(int id=0;id<lats.length;id++) {
+      Document doc = new Document();
+      doc.add(newStringField("id", ""+id, Field.Store.NO));
+      doc.add(new NumericDocValuesField("id", id));
+      if (Double.isNaN(lats[id]) == false) {
+        addPointToDoc(FIELD_NAME, doc, lats[id], lons[id]);
+      }
+      w.addDocument(doc);
+      if (id > 0 && random().nextInt(100) == 42) {
+        int idToDelete = random().nextInt(id);
+        w.deleteDocuments(new Term("id", ""+idToDelete));
+        deleted.add(idToDelete);
         if (VERBOSE) {
-          final DecimalFormat df = new DecimalFormat("#,###.00", DecimalFormatSymbols.getInstance(Locale.ENGLISH));
-          System.out.println("  radiusMeters = " + df.format(radiusMeters));
+          System.out.println("  delete id=" + idToDelete);
         }
+      }
+    }
 
-        query = newDistanceQuery(FIELD_NAME, centerLat, centerLon, radiusMeters);
+    if (random().nextBoolean()) {
+      w.forceMerge(1);
+    }
+    final IndexReader r = DirectoryReader.open(w);
+    w.close();
 
-        verifyHits = new VerifyHits() {
-          @Override
-          protected boolean shouldMatch(double pointLat, double pointLon) {
-            return circleContainsPoint(centerLat, centerLon, radiusMeters, pointLat, pointLon);
-          }
+    // nocommit can we wrap again?
+    // We can't wrap with "exotic" readers because points needs to work:
+    IndexSearcher s = newSearcher(r, false);
 
-          @Override
-          protected void describe(int docID, double pointLat, double pointLon) {
-            double distanceMeters = SloppyMath.haversinMeters(centerLat, centerLon, pointLat, pointLon);
-            System.out.println("  docID=" + docID + " centerLat=" + centerLat + " centerLon=" + centerLon
-                + " pointLat=" + pointLat + " pointLon=" + pointLon + " distanceMeters=" + distanceMeters
-                + " vs radiusMeters=" + radiusMeters);
-          }
-        };
+    final int iters = atLeast(75);
 
-        // TODO: get poly query working with dateline crossing too (how?)!
-      } else {
+    NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
 
-        // TODO: poly query can't handle dateline crossing yet:
-        final GeoRect bbox = randomRect(small, false);
-
-        // Polygon
-        final double[] polyLats;
-        final double[] polyLons;
-        // TODO: factor this out, maybe if we add Polygon class?
-        switch (random().nextInt(3)) {
-          case 0:
-            // box
-            polyLats = new double[5];
-            polyLons = new double[5];
-            polyLats[0] = bbox.minLat;
-            polyLons[0] = bbox.minLon;
-            polyLats[1] = bbox.maxLat;
-            polyLons[1] = bbox.minLon;
-            polyLats[2] = bbox.maxLat;
-            polyLons[2] = bbox.maxLon;
-            polyLats[3] = bbox.minLat;
-            polyLons[3] = bbox.maxLon;
-            polyLats[4] = bbox.minLat;
-            polyLons[4] = bbox.minLon;
-            break;
-          case 1:
-            // right triangle
-            polyLats = new double[4];
-            polyLons = new double[4];
-            polyLats[0] = bbox.minLat;
-            polyLons[0] = bbox.minLon;
-            polyLats[1] = bbox.maxLat;
-            polyLons[1] = bbox.minLon;
-            polyLats[2] = bbox.maxLat;
-            polyLons[2] = bbox.maxLon;
-            polyLats[3] = bbox.minLat;
-            polyLons[3] = bbox.minLon;
-            break;
-          default:
-            // surprise me!
-            double[][] res = surpriseMePolygon();
-            polyLats = res[0];
-            polyLons = res[1];
-            break;
-        }
-        query = newPolygonQuery(FIELD_NAME, polyLats, polyLons);
+    Bits liveDocs = MultiFields.getLiveDocs(s.getIndexReader());
+    int maxDoc = s.getIndexReader().maxDoc();
+
+    for (int iter=0;iter<iters;iter++) {
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " s=" + s);
+      }
+
+      // TODO: poly query can't handle dateline crossing yet:
+      final GeoRect bbox = randomRect(small, false);
+
+      // Polygon
+      final double[] polyLats;
+      final double[] polyLons;
+      // TODO: factor this out, maybe if we add Polygon class?
+      switch (random().nextInt(3)) {
+      case 0:
+        // box
+        polyLats = new double[5];
+        polyLons = new double[5];
+        polyLats[0] = bbox.minLat;
+        polyLons[0] = bbox.minLon;
+        polyLats[1] = bbox.maxLat;
+        polyLons[1] = bbox.minLon;
+        polyLats[2] = bbox.maxLat;
+        polyLons[2] = bbox.maxLon;
+        polyLats[3] = bbox.minLat;
+        polyLons[3] = bbox.maxLon;
+        polyLats[4] = bbox.minLat;
+        polyLons[4] = bbox.minLon;
+        break;
+      case 1:
+        // right triangle
+        polyLats = new double[4];
+        polyLons = new double[4];
+        polyLats[0] = bbox.minLat;
+        polyLons[0] = bbox.minLon;
+        polyLats[1] = bbox.maxLat;
+        polyLons[1] = bbox.minLon;
+        polyLats[2] = bbox.maxLat;
+        polyLons[2] = bbox.maxLon;
+        polyLats[3] = bbox.minLat;
+        polyLons[3] = bbox.minLon;
+        break;
+      default:
+        // surprise me!
+        double[][] res = surpriseMePolygon();
+        polyLats = res[0];
+        polyLons = res[1];
+        break;
+      }
+      Query query = newPolygonQuery(FIELD_NAME, polyLats, polyLons);
+
+      if (VERBOSE) {
+        System.out.println("  query=" + query);
+      }
+
+      final FixedBitSet hits = new FixedBitSet(maxDoc);
+      s.search(query, new SimpleCollector() {
+
+          private int docBase;
 
-        verifyHits = new VerifyHits() {
           @Override
-          protected boolean shouldMatch(double pointLat, double pointLon) {
-            return polygonContainsPoint(polyLats, polyLons, pointLat, pointLon);
+          public boolean needsScores() {
+            return false;
           }
 
           @Override
-          protected void describe(int docID, double lat, double lon) {
+          protected void doSetNextReader(LeafReaderContext context) throws IOException {
+            docBase = context.docBase;
           }
-        };
-      }
 
-      if (query != null) {
+          @Override
+          public void collect(int doc) {
+            hits.set(docBase+doc);
+          }
+        });
 
-        if (VERBOSE) {
-          System.out.println("  query=" + query);
+      boolean fail = false;
+      for(int docID=0;docID<maxDoc;docID++) {
+        int id = (int) docIDToID.get(docID);
+        boolean expected;
+        if (liveDocs != null && liveDocs.get(docID) == false) {
+          // document is deleted
+          expected = false;
+        } else if (Double.isNaN(lats[id])) {
+          expected = false;
+        } else {
+          expected = polygonContainsPoint(polyLats, polyLons, lats[id], lons[id]);
         }
 
-        verifyHits.test(failed, small, s, docIDToID, deleted, query, lats, lons);
+        if (hits.get(docID) != expected) {
+          StringBuilder b = new StringBuilder();
+
+          if (expected) {
+            b.append("FAIL: id=" + id + " should match but did not\n");
+          } else {
+            b.append("FAIL: id=" + id + " should not match but did\n");
+          }
+          b.append("  query=" + query + " docID=" + docID + "\n");
+          b.append("  lat=" + lats[id] + " lon=" + lons[id] + "\n");
+          b.append("  deleted?=" + (liveDocs != null && liveDocs.get(docID) == false));
+          b.append("  polyLats=" + Arrays.toString(polyLats));
+          b.append("  polyLons=" + Arrays.toString(polyLons));
+          if (true) {
+            fail("wrong hit (first of possibly more):\n\n" + b);
+          } else {
+            System.out.println(b.toString());
+            fail = true;
+          }
+        }
+      }
+      if (fail) {
+        fail("some hits were wrong");
       }
     }