You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/08/01 09:48:54 UTC
svn commit: r1693682 [2/2] - in /lucene/dev/trunk/lucene: ./
sandbox/src/java/org/apache/lucene/rangetree/
sandbox/src/resources/META-INF/services/
sandbox/src/test/org/apache/lucene/rangetree/
Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/SortedSetRangeTreeQuery.java Sat Aug 1 07:48:53 2015
@@ -0,0 +1,219 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ToStringUtils;
+
+import java.io.IOException;
+
+/** Finds all previously indexed values that fall within the specified {@link BytesRef} range.
+ *
+ * <p>The field must be indexed with {@link RangeTreeDocValuesFormat}, and {@link SortedSetDocValuesField} added per document.
+ *
+ * @lucene.experimental */
+
+public class SortedSetRangeTreeQuery extends Query {
+ final String field;
+ final BytesRef minValue;
+ final BytesRef maxValue;
+ final boolean minInclusive;
+ final boolean maxInclusive;
+
+ /** Matches all values in the specified {@link BytesRef} range. */
+ public SortedSetRangeTreeQuery(String field, BytesRef minValue, boolean minInclusive, BytesRef maxValue, boolean maxInclusive) {
+ this.field = field;
+ this.minInclusive = minInclusive;
+ this.minValue = minValue;
+ this.maxInclusive = maxInclusive;
+ this.maxValue = maxValue;
+ }
+
+ @Override
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+
+ // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
+ // used in the first pass:
+
+ return new ConstantScoreWeight(this) {
+
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ LeafReader reader = context.reader();
+ final SortedSetDocValues ssdv = reader.getSortedSetDocValues(field);
+ if (ssdv == null) {
+ // No docs in this segment had this field
+ return null;
+ }
+
+ if (ssdv instanceof RangeTreeSortedSetDocValues == false) {
+ throw new IllegalStateException("field \"" + field + "\" was not indexed with RangeTreeDocValuesFormat: got: " + ssdv);
+ }
+ RangeTreeSortedSetDocValues treeDV = (RangeTreeSortedSetDocValues) ssdv;
+ RangeTreeReader tree = treeDV.getRangeTreeReader();
+
+ /*
+ for(int i=0;i<treeDV.getValueCount();i++) {
+ System.out.println(" ord " + i + " -> " + treeDV.lookupOrd(i));
+ }
+ */
+
+ // lower
+ final long minOrdIncl;
+ if (minValue == null) {
+ minOrdIncl = 0;
+ } else {
+ long ord = ssdv.lookupTerm(minValue);
+ if (ord >= 0) {
+ // Exact match
+ if (minInclusive) {
+ minOrdIncl = ord;
+ } else {
+ minOrdIncl = ord+1;
+ }
+ } else {
+ minOrdIncl = -ord-1;
+ }
+ }
+
+ // upper
+ final long maxOrdIncl;
+ if (maxValue == null) {
+ maxOrdIncl = Long.MAX_VALUE;
+ } else {
+ long ord = ssdv.lookupTerm(maxValue);
+ if (ord >= 0) {
+ // Exact match
+ if (maxInclusive) {
+ maxOrdIncl = ord;
+ } else {
+ maxOrdIncl = ord-1;
+ }
+ } else {
+ maxOrdIncl = -ord-2;
+ }
+ }
+
+ if (maxOrdIncl < minOrdIncl) {
+ // This can happen when the requested range lies entirely between 2 adjacent ords:
+ return null;
+ }
+
+ //System.out.println(reader + ": ORD: " + minOrdIncl + "-" + maxOrdIncl + "; " + minValue + " - " + maxValue);
+
+ // Just a "view" of only the ords from the SSDV, as an SNDV. Maybe we
+ // have this view implemented somewhere else already? It's not so bad that
+ // we are inefficient here (making 2 passes over the ords): this is only
+ // used in at most 2 leaf cells (the boundary cells).
+ SortedNumericDocValues ords = new SortedNumericDocValues() {
+
+ private long[] ords = new long[2];
+ private int count;
+
+ @Override
+ public void setDocument(int doc) {
+ ssdv.setDocument(doc);
+ long ord;
+ count = 0;
+ while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ if (count == ords.length) {
+ ords = ArrayUtil.grow(ords, count+1);
+ }
+ ords[count++] = ord;
+ }
+ }
+
+ @Override
+ public int count() {
+ return count;
+ }
+
+ @Override
+ public long valueAt(int index) {
+ return ords[index];
+ }
+ };
+
+ DocIdSet result = tree.intersect(minOrdIncl, maxOrdIncl, ords, context.reader().maxDoc());
+
+ final DocIdSetIterator disi = result.iterator();
+
+ return new ConstantScoreScorer(this, score(), disi);
+ }
+ };
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = super.hashCode();
+ if (minValue != null) hash += minValue.hashCode()^0x14fa55fb;
+ if (maxValue != null) hash += maxValue.hashCode()^0x733fa5fe;
+ return hash +
+ (Boolean.valueOf(minInclusive).hashCode()^0x14fa55fb)+
+ (Boolean.valueOf(maxInclusive).hashCode()^0x733fa5fe);
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (super.equals(other)) {
+ final SortedSetRangeTreeQuery q = (SortedSetRangeTreeQuery) other;
+ return (
+ (q.minValue == null ? minValue == null : q.minValue.equals(minValue)) &&
+ (q.maxValue == null ? maxValue == null : q.maxValue.equals(maxValue)) &&
+ minInclusive == q.minInclusive &&
+ maxInclusive == q.maxInclusive
+ );
+ }
+
+ return false;
+ }
+
+ @Override
+ public String toString(String field) {
+ final StringBuilder sb = new StringBuilder();
+ sb.append(getClass().getSimpleName());
+ sb.append(':');
+ if (this.field.equals(field) == false) {
+ sb.append("field=");
+ sb.append(this.field);
+ sb.append(':');
+ }
+
+ return sb.append(minInclusive ? '[' : '{')
+ .append((minValue == null) ? "*" : minValue.toString())
+ .append(" TO ")
+ .append((maxValue == null) ? "*" : maxValue.toString())
+ .append(maxInclusive ? ']' : '}')
+ .append(ToStringUtils.boost(getBoost()))
+ .toString();
+ }
+}
Added: lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html (added)
+++ lucene/dev/trunk/lucene/sandbox/src/java/org/apache/lucene/rangetree/package.html Sat Aug 1 07:48:53 2015
@@ -0,0 +1,28 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- not a package-info.java, because we already defined this package in core/ -->
+
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+This package contains a numeric tree implementation for indexing long values enabling fast range searching.
+</body>
+</html>
Modified: lucene/dev/trunk/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat?rev=1693682&r1=1693681&r2=1693682&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat (original)
+++ lucene/dev/trunk/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat Sat Aug 1 07:48:53 2015
@@ -14,4 +14,5 @@
# limitations under the License.
org.apache.lucene.bkdtree.BKDTreeDocValuesFormat
+org.apache.lucene.rangetree.RangeTreeDocValuesFormat
Added: lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/rangetree/TestRangeTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/rangetree/TestRangeTree.java?rev=1693682&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/rangetree/TestRangeTree.java (added)
+++ lucene/dev/trunk/lucene/sandbox/src/test/org/apache/lucene/rangetree/TestRangeTree.java Sat Aug 1 07:48:53 2015
@@ -0,0 +1,764 @@
+package org.apache.lucene.rangetree;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.lucene53.Lucene53Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.Accountables;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.junit.BeforeClass;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+// TODO: can test framework assert we don't leak temp files?
+
+public class TestRangeTree extends LuceneTestCase {
+
+ // Controls what range of values we randomly generate, so we sometimes test narrow ranges:
+ static long valueMid;
+ static int valueRange;
+
+ @BeforeClass
+ public static void beforeClass() {
+ if (random().nextBoolean()) {
+ valueMid = random().nextLong();
+ if (random().nextBoolean()) {
+ // Wide range
+ valueRange = TestUtil.nextInt(random(), 1, Integer.MAX_VALUE);
+ } else {
+ // Narrow range
+ valueRange = TestUtil.nextInt(random(), 1, 100000);
+ }
+ if (VERBOSE) {
+ System.out.println("TEST: will generate long values " + valueMid + " +/- " + valueRange);
+ }
+ } else {
+ // All longs
+ valueRange = 0;
+ if (VERBOSE) {
+ System.out.println("TEST: will generate all long values");
+ }
+ }
+ }
+
+ public void testAllEqual() throws Exception {
+ int numValues = atLeast(10000);
+ long value = randomValue();
+ long[] values = new long[numValues];
+ FixedBitSet missing = new FixedBitSet(numValues);
+
+ if (VERBOSE) {
+ System.out.println("TEST: use same value=" + value);
+ }
+
+ for(int docID=0;docID<numValues;docID++) {
+ int x = random().nextInt(20);
+ if (x == 17) {
+ // Some docs don't have a point:
+ missing.set(docID);
+ if (VERBOSE) {
+ System.out.println(" doc=" + docID + " is missing");
+ }
+ continue;
+ }
+ values[docID] = value;
+ }
+
+ verify(missing, values);
+ }
+
+ public void testMultiValued() throws Exception {
+ int numValues = atLeast(10000);
+ // Every doc has 2 values:
+ long[] values = new long[2*numValues];
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+
+ // We rely on docID order:
+ iwc.setMergePolicy(newLogMergePolicy());
+ int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
+ int maxPointsSortInHeap = TestUtil.nextInt(random(), 1024, 1024*1024);
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap));
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ for (int docID=0;docID<numValues;docID++) {
+ Document doc = new Document();
+ values[2*docID] = randomValue();
+ doc.add(new SortedNumericDocValuesField("value", values[2*docID]));
+ values[2*docID+1] = randomValue();
+ doc.add(new SortedNumericDocValuesField("value", values[2*docID+1]));
+ w.addDocument(doc);
+ }
+
+ if (random().nextBoolean()) {
+ w.forceMerge(1);
+ }
+ IndexReader r = w.getReader();
+ w.close();
+ // We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ int iters = atLeast(100);
+ for (int iter=0;iter<iters;iter++) {
+ long lower = randomValue();
+ long upper = randomValue();
+
+ if (upper < lower) {
+ long x = lower;
+ lower = upper;
+ upper = x;
+ }
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: iter=" + iter + " value=" + lower + " TO " + upper);
+ }
+
+ boolean includeLower = random().nextBoolean();
+ boolean includeUpper = random().nextBoolean();
+ Query query = new NumericRangeTreeQuery("value", lower, includeLower, upper, includeUpper);
+
+ final FixedBitSet hits = new FixedBitSet(r.maxDoc());
+ s.search(query, new SimpleCollector() {
+
+ private int docBase;
+
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ docBase = context.docBase;
+ }
+
+ @Override
+ public void collect(int doc) {
+ hits.set(docBase+doc);
+ }
+ });
+
+ for(int docID=0;docID<values.length/2;docID++) {
+ long docValue1 = values[2*docID];
+ long docValue2 = values[2*docID+1];
+ boolean expected = matches(lower, includeLower, upper, includeUpper, docValue1) ||
+ matches(lower, includeLower, upper, includeUpper, docValue2);
+
+ if (hits.get(docID) != expected) {
+ fail("docID=" + docID + " docValue1=" + docValue1 + " docValue2=" + docValue2 + " expected " + expected + " but got: " + hits.get(docID));
+ }
+ }
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testMultiValuedSortedSet() throws Exception {
+ int numValues = atLeast(10000);
+ // Every doc has 2 values:
+ long[] values = new long[2*numValues];
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+
+ // We rely on docID order:
+ iwc.setMergePolicy(newLogMergePolicy());
+ int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
+ int maxPointsSortInHeap = TestUtil.nextInt(random(), 1024, 1024*1024);
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap));
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ for (int docID=0;docID<numValues;docID++) {
+ Document doc = new Document();
+ values[2*docID] = randomValue();
+ doc.add(new SortedSetDocValuesField("value", longToBytes(values[2*docID])));
+ values[2*docID+1] = randomValue();
+ doc.add(new SortedSetDocValuesField("value", longToBytes(values[2*docID+1])));
+ w.addDocument(doc);
+ }
+
+ if (random().nextBoolean()) {
+ w.forceMerge(1);
+ }
+ IndexReader r = w.getReader();
+ w.close();
+ // We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ int iters = atLeast(100);
+ for (int iter=0;iter<iters;iter++) {
+ long lower = randomValue();
+ long upper = randomValue();
+
+ if (upper < lower) {
+ long x = lower;
+ lower = upper;
+ upper = x;
+ }
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: iter=" + iter + " value=" + lower + " TO " + upper);
+ }
+
+ boolean includeLower = random().nextBoolean();
+ boolean includeUpper = random().nextBoolean();
+ Query query = new SortedSetRangeTreeQuery("value", longToBytes(lower), includeLower, longToBytes(upper), includeUpper);
+
+ final FixedBitSet hits = new FixedBitSet(r.maxDoc());
+ s.search(query, new SimpleCollector() {
+
+ private int docBase;
+
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ docBase = context.docBase;
+ }
+
+ @Override
+ public void collect(int doc) {
+ hits.set(docBase+doc);
+ }
+ });
+
+ for(int docID=0;docID<values.length/2;docID++) {
+ long docValue1 = values[2*docID];
+ long docValue2 = values[2*docID+1];
+ boolean expected = matches(lower, includeLower, upper, includeUpper, docValue1) ||
+ matches(lower, includeLower, upper, includeUpper, docValue2);
+
+ if (hits.get(docID) != expected) {
+ fail("docID=" + docID + " docValue1=" + docValue1 + " docValue2=" + docValue2 + " expected " + expected + " but got: " + hits.get(docID));
+ }
+ }
+ }
+ r.close();
+ dir.close();
+ }
+
+ public void testRandomTiny() throws Exception {
+ // Make sure single-leaf-node case is OK:
+ doTestRandom(10);
+ }
+
+ public void testRandomMedium() throws Exception {
+ doTestRandom(10000);
+ }
+
+ @Nightly
+ public void testRandomBig() throws Exception {
+ doTestRandom(200000);
+ }
+
+ private void doTestRandom(int count) throws Exception {
+
+ int numValues = atLeast(count);
+
+ if (VERBOSE) {
+ System.out.println("TEST: numValues=" + numValues);
+ }
+
+ long[] values = new long[numValues];
+ FixedBitSet missing = new FixedBitSet(numValues);
+
+ boolean haveRealDoc = false;
+
+ for (int docID=0;docID<numValues;docID++) {
+ int x = random().nextInt(20);
+ if (x == 17) {
+ // Some docs don't have a point:
+ missing.set(docID);
+ if (VERBOSE) {
+ System.out.println(" doc=" + docID + " is missing");
+ }
+ continue;
+ }
+
+ if (docID > 0 && x == 0 && haveRealDoc) {
+ int oldDocID;
+ while (true) {
+ oldDocID = random().nextInt(docID);
+ if (missing.get(oldDocID) == false) {
+ break;
+ }
+ }
+
+ // Identical to old value
+ values[docID] = values[oldDocID];
+ if (VERBOSE) {
+ System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]) + " (same as doc=" + oldDocID + ")");
+ }
+ } else {
+ values[docID] = randomValue();
+ haveRealDoc = true;
+ if (VERBOSE) {
+ System.out.println(" doc=" + docID + " value=" + values[docID] + " bytes=" + longToBytes(values[docID]));
+ }
+ }
+ }
+
+ verify(missing, values);
+ }
+
+ private static void verify(Bits missing, long[] values) throws Exception {
+ int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
+ int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
+ IndexWriterConfig iwc = newIndexWriterConfig();
+
+ // Else we can get O(N^2) merging:
+ int mbd = iwc.getMaxBufferedDocs();
+ if (mbd != -1 && mbd < values.length/100) {
+ iwc.setMaxBufferedDocs(values.length/100);
+ }
+ final DocValuesFormat dvFormat = new RangeTreeDocValuesFormat(maxPointsInLeaf, maxPointsSortInHeap);
+ Codec codec = new Lucene53Codec() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ if (field.equals("sn_value") || field.equals("ss_value")) {
+ return dvFormat;
+ } else {
+ return super.getDocValuesFormatForField(field);
+ }
+ }
+ };
+ iwc.setCodec(codec);
+ Directory dir;
+ if (values.length > 100000) {
+ dir = newFSDirectory(createTempDir("TestRangeTree"));
+ } else {
+ dir = newDirectory();
+ }
+ Set<Integer> deleted = new HashSet<>();
+ // RandomIndexWriter is too slow here:
+ IndexWriter w = new IndexWriter(dir, iwc);
+ for(int id=0;id<values.length;id++) {
+ Document doc = new Document();
+ doc.add(newStringField("id", ""+id, Field.Store.NO));
+ doc.add(new NumericDocValuesField("id", id));
+ if (missing.get(id) == false) {
+ doc.add(new SortedNumericDocValuesField("sn_value", values[id]));
+ doc.add(new SortedSetDocValuesField("ss_value", longToBytes(values[id])));
+ }
+ w.addDocument(doc);
+ if (id > 0 && random().nextInt(100) == 42) {
+ int idToDelete = random().nextInt(id);
+ w.deleteDocuments(new Term("id", ""+idToDelete));
+ deleted.add(idToDelete);
+ if (VERBOSE) {
+ System.out.println(" delete id=" + idToDelete);
+ }
+ }
+ }
+ if (random().nextBoolean()) {
+ w.forceMerge(1);
+ }
+ final IndexReader r = DirectoryReader.open(w, true);
+ w.close();
+
+ // We can't wrap with "exotic" readers because the NumericRangeTreeQuery must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ int numThreads = TestUtil.nextInt(random(), 2, 5);
+
+ if (VERBOSE) {
+ System.out.println("TEST: use " + numThreads + " query threads");
+ }
+
+ List<Thread> threads = new ArrayList<>();
+ final int iters = atLeast(100);
+
+ final CountDownLatch startingGun = new CountDownLatch(1);
+ final AtomicBoolean failed = new AtomicBoolean();
+
+ for(int i=0;i<numThreads;i++) {
+ Thread thread = new Thread() {
+ @Override
+ public void run() {
+ try {
+ _run();
+ } catch (Exception e) {
+ failed.set(true);
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void _run() throws Exception {
+ startingGun.await();
+
+ NumericDocValues docIDToID = MultiDocValues.getNumericValues(r, "id");
+
+ for (int iter=0;iter<iters && failed.get() == false;iter++) {
+ long lower = randomValue();
+ long upper = randomValue();
+
+ if (upper < lower) {
+ long x = lower;
+ lower = upper;
+ upper = x;
+ }
+
+ if (VERBOSE) {
+ System.out.println("\n" + Thread.currentThread().getName() + ": TEST: iter=" + iter + " value=" + lower + " TO " + upper);
+ }
+
+ boolean includeLower = random().nextBoolean();
+ boolean includeUpper = random().nextBoolean();
+ Query query;
+ if (random().nextBoolean()) {
+ query = new NumericRangeTreeQuery("sn_value", lower, includeLower, upper, includeUpper);
+ } else {
+ query = new SortedSetRangeTreeQuery("ss_value", longToBytes(lower), includeLower, longToBytes(upper), includeUpper);
+ }
+
+ if (VERBOSE) {
+ System.out.println(Thread.currentThread().getName() + ": using query: " + query);
+ }
+
+ final FixedBitSet hits = new FixedBitSet(r.maxDoc());
+ s.search(query, new SimpleCollector() {
+
+ private int docBase;
+
+ @Override
+ public boolean needsScores() {
+ return false;
+ }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext context) throws IOException {
+ docBase = context.docBase;
+ }
+
+ @Override
+ public void collect(int doc) {
+ hits.set(docBase+doc);
+ }
+ });
+
+ if (VERBOSE) {
+ System.out.println(Thread.currentThread().getName() + ": hitCount: " + hits.cardinality());
+ }
+
+ for(int docID=0;docID<r.maxDoc();docID++) {
+ int id = (int) docIDToID.get(docID);
+ boolean expected = missing.get(id) == false && deleted.contains(id) == false && matches(lower, includeLower, upper, includeUpper, values[id]);
+ if (hits.get(docID) != expected) {
+ // We do exact quantized comparison so the bbox query should never disagree:
+ fail(Thread.currentThread().getName() + ": iter=" + iter + " id=" + id + " docID=" + docID + " value=" + values[id] + " (range: " + lower + " TO " + upper + ") expected " + expected + " but got: " + hits.get(docID) + " deleted?=" + deleted.contains(id) + " query=" + query);
+ }
+ }
+ }
+ }
+ };
+ thread.setName("T" + i);
+ thread.start();
+ threads.add(thread);
+ }
+ startingGun.countDown();
+ for(Thread thread : threads) {
+ thread.join();
+ }
+ IOUtils.close(r, dir);
+ }
+
+ private static boolean matches(long lower, boolean includeLower, long upper, boolean includeUpper, long value) {
+ if (value > lower && value < upper) {
+ return true;
+ }
+ if (value == lower && includeLower) {
+ return true;
+ }
+ if (value == upper && includeUpper) {
+ return true;
+ }
+ return false;
+ }
+
+ private static long randomValue() {
+ if (valueRange == 0) {
+ return random().nextLong();
+ } else {
+ return valueMid + TestUtil.nextInt(random(), -valueRange, valueRange);
+ }
+ }
+
+ public void testAccountableHasDelegate() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", 187));
+ w.addDocument(doc);
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+ // Need to run a query so the DV field is really loaded:
+ TopDocs hits = s.search(new NumericRangeTreeQuery("value", -30L, true, 187L, true), 1);
+ assertEquals(1, hits.totalHits);
+ assertTrue(Accountables.toString((Accountable) r.leaves().get(0).reader()).contains("delegate"));
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testMinMaxLong() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE));
+ w.addDocument(doc);
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, 0L, true)));
+ assertEquals(1, s.count(new NumericRangeTreeQuery("value", 0L, true, Long.MAX_VALUE, true)));
+ assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testBasicSortedSet() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("value", new BytesRef("abc")));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("value", new BytesRef("def")));
+ w.addDocument(doc);
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("aaa"), true, new BytesRef("bbb"), true)));
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("c"), true, new BytesRef("e"), true)));
+ assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("z"), true)));
+
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", null, true, new BytesRef("abc"), true)));
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), true)));
+ assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("a"), true, new BytesRef("abc"), false)));
+
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, null, false)));
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), true, new BytesRef("z"), true)));
+ assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("def"), false, new BytesRef("z"), true)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testLongMinMaxNumeric() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", Long.MIN_VALUE));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", Long.MAX_VALUE));
+ w.addDocument(doc);
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ assertEquals(2, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, true)));
+ assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, true, Long.MAX_VALUE, false)));
+ assertEquals(1, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, true)));
+ assertEquals(0, s.count(new NumericRangeTreeQuery("value", Long.MIN_VALUE, false, Long.MAX_VALUE, false)));
+
+ assertEquals(2, s.count(new NumericRangeTreeQuery("value", null, true, null, true)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testLongMinMaxSortedSet() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MIN_VALUE)));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("value", longToBytes(Long.MAX_VALUE)));
+ w.addDocument(doc);
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+
+ assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), true)));
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), true, longToBytes(Long.MAX_VALUE), false)));
+ assertEquals(1, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), true)));
+ assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", longToBytes(Long.MIN_VALUE), false, longToBytes(Long.MAX_VALUE), false)));
+
+ assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testSortedSetNoOrdsMatch() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedSetDocValuesField("value", new BytesRef("a")));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new SortedSetDocValuesField("value", new BytesRef("z")));
+ w.addDocument(doc);
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+ assertEquals(0, s.count(new SortedSetRangeTreeQuery("value", new BytesRef("m"), true, new BytesRef("n"), false)));
+
+ assertEquals(2, s.count(new SortedSetRangeTreeQuery("value", null, true, null, true)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testNumericNoValuesMatch() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", 17));
+ w.addDocument(doc);
+ doc = new Document();
+ doc.add(new SortedNumericDocValuesField("value", 22));
+ w.addDocument(doc);
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+ assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ public void testNoDocs() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ Codec codec = TestUtil.alwaysDocValuesFormat(new RangeTreeDocValuesFormat());
+ iwc.setCodec(codec);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ w.addDocument(new Document());
+
+ IndexReader r = w.getReader();
+
+ // We can't wrap with "exotic" readers because the query must see the NumericTreeDVFormat:
+ IndexSearcher s = newSearcher(r, false);
+ assertEquals(0, s.count(new NumericRangeTreeQuery("value", 17L, true, 13L, false)));
+
+ IOUtils.close(r, w, dir);
+ }
+
+ private static BytesRef longToBytes(long v) {
+ // Flip the sign bit so negative longs sort before positive longs:
+ v ^= 0x8000000000000000L;
+ byte[] bytes = new byte[8];
+ bytes[0] = (byte) (v >> 56);
+ bytes[1] = (byte) (v >> 48);
+ bytes[2] = (byte) (v >> 40);
+ bytes[3] = (byte) (v >> 32);
+ bytes[4] = (byte) (v >> 24);
+ bytes[5] = (byte) (v >> 16);
+ bytes[6] = (byte) (v >> 8);
+ bytes[7] = (byte) v;
+ return new BytesRef(bytes);
+ }
+
+ /*
+ private static long bytesToLong(BytesRef bytes) {
+ long v = ((bytes.bytes[bytes.offset]&0xFFL) << 56) |
+ ((bytes.bytes[bytes.offset+1]&0xFFL) << 48) |
+ ((bytes.bytes[bytes.offset+2]&0xFFL) << 40) |
+ ((bytes.bytes[bytes.offset+3]&0xFFL) << 32) |
+ ((bytes.bytes[bytes.offset+4]&0xFFL) << 24) |
+ ((bytes.bytes[bytes.offset+5]&0xFFL) << 16) |
+ ((bytes.bytes[bytes.offset+6]&0xFFL) << 8) |
+ (bytes.bytes[bytes.offset+7]&0xFFL);
+ // Flip the sign bit back:
+ return v ^ 0x8000000000000000L;
+ }
+ */
+}