You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/02/25 19:41:56 UTC
[03/18] lucene-solr git commit: more tests;
move factory method to IntPoint
more tests; move factory method to IntPoint
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1654818e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1654818e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1654818e
Branch: refs/heads/master
Commit: 1654818e9814b99a75d2d4f4ac590813fab2f10a
Parents: 96ed42b
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Feb 23 17:12:50 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Feb 23 17:12:50 2016 -0500
----------------------------------------------------------------------
.../org/apache/lucene/document/IntPoint.java | 40 +++++++++++
.../apache/lucene/search/PointInSetQuery.java | 66 ++++++++----------
.../apache/lucene/search/TestPointQueries.java | 70 ++++++++++++++++++--
3 files changed, 133 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1654818e/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/IntPoint.java b/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
index be91bfd..42091d9 100644
--- a/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
+++ b/lucene/core/src/java/org/apache/lucene/document/IntPoint.java
@@ -16,7 +16,12 @@
*/
package org.apache.lucene.document;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.NumericUtils;
/** An int field that is indexed dimensionally such that finding
@@ -88,6 +93,41 @@ public final class IntPoint extends Field {
super(name, pack(point), getType(point.length));
}
+ /** Returns a query efficiently finding all documents that indexed the provided 1D int values */
+ public static PointInSetQuery newSetQuery(String field, int... valuesIn) throws IOException {
+
+ // Don't unexpectedly change the user's incoming array:
+ int[] values = valuesIn.clone();
+
+ Arrays.sort(values);
+
+ final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
+ value.length = Integer.BYTES;
+
+ return new PointInSetQuery(field, 1, Integer.BYTES,
+ new BytesRefIterator() {
+
+ int upto;
+
+ @Override
+ public BytesRef next() {
+ if (upto == values.length) {
+ return null;
+ } else {
+ IntPoint.encodeDimension(values[upto], value.bytes, 0);
+ upto++;
+ return value;
+ }
+ }
+ }) {
+ @Override
+ protected String toString(byte[] value) {
+ assert value.length == Integer.BYTES;
+ return Integer.toString(decodeDimension(value, 0));
+ }
+ };
+ }
+
@Override
public String toString() {
StringBuilder result = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1654818e/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
index bbc9a54..adb9c53 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@@ -80,40 +80,6 @@ public class PointInSetQuery extends Query {
sortedPackedPointsHashCode = sortedPackedPoints.hashCode();
}
- /** Use in the 1D case when you indexed 1D int values using {@link org.apache.lucene.document.IntPoint} */
- public static PointInSetQuery newIntSet(String field, int... valuesIn) {
-
- // Don't unexpectedly change the user's incoming array:
- int[] values = valuesIn.clone();
-
- Arrays.sort(values);
-
- final BytesRef value = new BytesRef(new byte[Integer.BYTES]);
- value.length = Integer.BYTES;
-
- try {
- return new PointInSetQuery(field, 1, Integer.BYTES,
- new BytesRefIterator() {
-
- int upto;
-
- @Override
- public BytesRef next() {
- if (upto == values.length) {
- return null;
- } else {
- IntPoint.encodeDimension(values[upto], value.bytes, 0);
- upto++;
- return value;
- }
- }
- });
- } catch (IOException bogus) {
- // Should never happen ;)
- throw new RuntimeException(bogus);
- }
- }
-
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
@@ -354,18 +320,44 @@ public class PointInSetQuery extends Query {
sb.append(getClass().getSimpleName());
sb.append(':');
if (this.field.equals(field) == false) {
- sb.append("field=");
+ sb.append(" field=");
sb.append(this.field);
sb.append(':');
}
+ sb.append(" points:");
+
TermIterator iterator = sortedPackedPoints.iterator();
+ byte[] pointBytes = new byte[numDims * bytesPerDim];
for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
sb.append(' ');
- // nocommit fix me to convert back to the numbers/etc.:
- sb.append(point);
+ System.arraycopy(point.bytes, point.offset, pointBytes, 0, pointBytes.length);
+ sb.append(toString(pointBytes));
}
return sb.toString();
}
+
+ /**
+ * Returns a string of a single value in a human-readable format for debugging.
+ * This is used by {@link #toString()}.
+ *
+ * The default implementation encodes the individual byte values.
+ *
+ * @param value single value, never null
+ * @return human readable value for debugging
+ */
+ protected String toString(byte[] value) {
+ assert value != null;
+ StringBuilder sb = new StringBuilder();
+ sb.append("binary(");
+ for (int i = 0; i < value.length; i++) {
+ if (i > 0) {
+ sb.append(' ');
+ }
+ sb.append(Integer.toHexString(value[i] & 0xFF));
+ }
+ sb.append(')');
+ return sb.toString();
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1654818e/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index ea432f7..c48cacb 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -1139,12 +1139,70 @@ public class TestPointQueries extends LuceneTestCase {
IndexReader r = DirectoryReader.open(w);
IndexSearcher s = newSearcher(r);
- assertEquals(0, s.count(PointInSetQuery.newIntSet("int", 16)));
- assertEquals(1, s.count(PointInSetQuery.newIntSet("int", 17)));
- assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 97, 42)));
- assertEquals(3, s.count(PointInSetQuery.newIntSet("int", -7, 17, 42, 97)));
- assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 20, 42, 97)));
- assertEquals(3, s.count(PointInSetQuery.newIntSet("int", 17, 105, 42, 97)));
+ assertEquals(0, s.count(IntPoint.newSetQuery("int", 16)));
+ assertEquals(1, s.count(IntPoint.newSetQuery("int", 17)));
+ assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 97, 42)));
+ assertEquals(3, s.count(IntPoint.newSetQuery("int", -7, 17, 42, 97)));
+ assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 20, 42, 97)));
+ assertEquals(3, s.count(IntPoint.newSetQuery("int", 17, 105, 42, 97)));
+ w.close();
+ r.close();
+ dir.close();
+ }
+
+ public void testPointInSetQueryManyEqualValues() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ iwc.setCodec(getCodec());
+ IndexWriter w = new IndexWriter(dir, iwc);
+
+ int zeroCount = 0;
+ for(int i=0;i<10000;i++) {
+ int x = random().nextInt(2);
+ if (x == 0) {
+ zeroCount++;
+ }
+ Document doc = new Document();
+ doc.add(new IntPoint("int", x));
+ w.addDocument(doc);
+ }
+
+ IndexReader r = DirectoryReader.open(w);
+ IndexSearcher s = newSearcher(r);
+ assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
+ assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
+ assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
+ assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 1)));
+ assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
+ w.close();
+ r.close();
+ dir.close();
+ }
+
+ public void testPointInSetQueryManyEqualValuesBigGap() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ iwc.setCodec(getCodec());
+ IndexWriter w = new IndexWriter(dir, iwc);
+
+ int zeroCount = 0;
+ for(int i=0;i<10000;i++) {
+ int x = 200 * random().nextInt(2);
+ if (x == 0) {
+ zeroCount++;
+ }
+ Document doc = new Document();
+ doc.add(new IntPoint("int", x));
+ w.addDocument(doc);
+ }
+
+ IndexReader r = DirectoryReader.open(w);
+ IndexSearcher s = newSearcher(r);
+ assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0)));
+ assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 0, -7)));
+ assertEquals(zeroCount, s.count(IntPoint.newSetQuery("int", 7, 0)));
+ assertEquals(10000-zeroCount, s.count(IntPoint.newSetQuery("int", 200)));
+ assertEquals(0, s.count(IntPoint.newSetQuery("int", 2)));
w.close();
r.close();
dir.close();