You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/02/25 19:41:56 UTC

[03/18] lucene-solr git commit: more tests; move factory method to IntPoint

more tests; move factory method to IntPoint


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1654818e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1654818e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1654818e

Branch: refs/heads/master
Commit: 1654818e9814b99a75d2d4f4ac590813fab2f10a
Parents: 96ed42b
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Feb 23 17:12:50 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Feb 23 17:12:50 2016 -0500

----------------------------------------------------------------------
 .../org/apache/lucene/document/IntPoint.java    | 40 +++++++++++
 .../apache/lucene/search/PointInSetQuery.java   | 66 ++++++++----------
 .../apache/lucene/search/TestPointQueries.java  | 70 ++++++++++++++++++--
 3 files changed, 133 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1654818e/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/IntPoint.java b/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
index be91bfd..42091d9 100644
--- a/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
+++ b/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
@@ -16,7 +16,12 @@
  */
 package org.apache.lucene.document;
 
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.search.PointInSetQuery;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.NumericUtils;
 
 /** An int field that is indexed dimensionally such that finding
@@ -88,6 +93,41 @@ public final class IntPoint extends Field {
     super(name, pack(point), getType(point.length));
   }
   
+  /** Returns a query efficiently finding all documents that indexed the provided 1D int values */
+  public static PointInSetQuery newSetQuery(String field, int... valuesIn) throws IOException {
+
+    // Don't unexpectedly change the user's incoming array:
+    int[] values = valuesIn.clone();
+
+    Arrays.sort(values);
+
+    final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
+    value.length = Integer.BYTES;
+
+    return new PointInSetQuery(field, 1, Integer.BYTES,
+                               new BytesRefIterator() {
+
+                                 int upto;
+
+                                 @Override
+                                 public BytesRef next() {
+                                   if (upto == values.length) {
+                                     return null;
+                                   } else {
+                                     IntPoint.encodeDimension(values[upto], value.bytes, 0);
+                                     upto++;
+                                     return value;
+                                   }
+                                 }
+                               }) {
+      @Override
+      protected String toString(byte[] value) {
+        assert value.length == Integer.BYTES;
+        return Integer.toString(decodeDimension(value, 0));
+      }
+    };
+  }
+
   @Override
   public String toString() {
     StringBuilder result = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1654818e/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
index bbc9a54..adb9c53 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@@ -80,40 +80,6 @@ public class PointInSetQuery extends Query {
     sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
   }
 
-  /** Use in the 1D case when you indexed 1D int values using {@link org.apache.lucene.document.IntPoint} */
-  public static PointInSetQuery newIntSet(String field, int... valuesIn) {
-
-    // Don't unexpectedly change the user's incoming array:
-    int[] values = valuesIn.clone();
-
-    Arrays.sort(values);
-
-    final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
-    value.length = Integer.BYTES;
-
-    try {
-      return new PointInSetQuery(field, 1, Integer.BYTES,
-                                 new BytesRefIterator() {
-
-                                   int upto;
-
-                                   @Override
-                                   public BytesRef next() {
-                                     if (upto == values.length) {
-                                       return null;
-                                     } else {
-                                       IntPoint.encodeDimension(values[upto], value.bytes, 0);
-                                       upto++;
-                                       return value;
-                                     }
-                                   }
-                                 });
-    } catch (IOException bogus) {
-      // Should never happen ;)
-      throw new RuntimeException(bogus);
-    }
-  }
-
   @Override
   public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
 
@@ -354,18 +320,44 @@ public class PointInSetQuery extends Query {
     sb.append(getClass().getSimpleName());
     sb.append(':');
     if (this.field.equals(field) == false) {
-      sb.append("field=");
+      sb.append(" field=");
       sb.append(this.field);
       sb.append(':');
     }
 
+    sb.append(" points:");
+
     TermIterator iterator = sortedPackedPoints.iterator();
+    byte[] pointBytes = new byte[numDims * bytesPerDim];
     for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
       sb.append(' ');
-      // nocommit fix me to convert back to the numbers/etc.:
-      sb.append(point);
+      System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
+      sb.append(toString(pointBytes));
     }
 
     return sb.toString();
   }
+
+  /**
+   * Returns a string of a single value in a human-readable format for debugging.
+   * This is used by {@link #toString()}.
+   *
+   * The default implementation encodes the individual byte values.
+   *
+   * @param value single value, never null
+   * @return human readable value for debugging
+   */
+  protected String toString(byte[] value) {
+    assert value != null;
+    StringBuilder sb = new StringBuilder();
+    sb.append("binary(");
+    for (int i = 0; i < value.length; i++) {
+      if (i > 0) {
+        sb.append(' ');
+      }
+      sb.append(Integer.toHexString(value[i] & 0xFF));
+    }
+    sb.append(')');
+    return sb.toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1654818e/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index ea432f7..c48cacb 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -1139,12 +1139,70 @@ public class TestPointQueries extends LuceneTestCase {
 
     IndexReader r = DirectoryReader.open(w);
     IndexSearcher s = newSearcher(r);
-    assertEquals(0, s.count(PointInSetQuery.newIntSet("int", 16)));
-    assertEquals(1, s.count(PointInSetQuery.newIntSet("int", 17)));
-    assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 97, 42)));
-    assertEquals(3, s.count(PointInSetQuery.newIntSet("int", -7, 17, 42, 97)));
-    assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 20, 42, 97)));
-    assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 105, 42, 97)));
+    assertEquals(0, s.count(IntPoint.newSetQuery("int", 16)));
+    assertEquals(1, s.count(IntPoint.newSetQuery("int", 17)));
+    assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 97, 42)));
+    assertEquals(3, s.count(IntPoint.newSetQuery("int", -7, 17, 42, 97)));
+    assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 20, 42, 97)));
+    assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 105, 42, 97)));
+    w.close();
+    r.close();
+    dir.close();
+  }
+
+  public void testPointInSetQueryManyEqualValues() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    iwc.setCodec(getCodec());
+    IndexWriter w = new IndexWriter(dir, iwc);
+
+    int zeroCount = 0;
+    for(int i=0;i<10000;i++) {
+      int x = random().nextInt(2);
+      if (x == 0) {
+        zeroCount++;
+      }
+      Document doc = new Document();
+      doc.add(new IntPoint("int", x));
+      w.addDocument(doc);
+    }
+
+    IndexReader r = DirectoryReader.open(w);
+    IndexSearcher s = newSearcher(r);
+    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
+    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
+    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
+    assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 1)));
+    assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
+    w.close();
+    r.close();
+    dir.close();
+  }
+
+  public void testPointInSetQueryManyEqualValuesBigGap() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig();
+    iwc.setCodec(getCodec());
+    IndexWriter w = new IndexWriter(dir, iwc);
+
+    int zeroCount = 0;
+    for(int i=0;i<10000;i++) {
+      int x = 200 * random().nextInt(2);
+      if (x == 0) {
+        zeroCount++;
+      }
+      Document doc = new Document();
+      doc.add(new IntPoint("int", x));
+      w.addDocument(doc);
+    }
+
+    IndexReader r = DirectoryReader.open(w);
+    IndexSearcher s = newSearcher(r);
+    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
+    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
+    assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
+    assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 200)));
+    assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
     w.close();
     r.close();
     dir.close();