You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2008/11/29 12:50:28 UTC
svn commit: r721663 - in /lucene/java/trunk: ./
src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/
Author: mikemccand
Date: Sat Nov 29 03:50:28 2008
New Revision: 721663
URL: http://svn.apache.org/viewvc?rev=721663&view=rev
Log:
LUCENE-1461: added FieldCacheRangeFilter, which speeds up creation of RangeFilter by using the FieldCache
Added:
lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (with props)
lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (with props)
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java
lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=721663&r1=721662&r2=721663&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sat Nov 29 03:50:28 2008
@@ -96,6 +96,15 @@
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
McCandless)
+ 7. LUCENE-1461: Added FieldCacheRangeFilter, a RangeFilter for
+ single-term fields that uses FieldCache to compute the filter. If
+ your field has a single term per document, and you need to create
+ many RangeFilters with varying lower/upper bounds, then this is
+ likely a much faster way to create the filters than RangeFilter.
+ However, it comes at the expense of added RAM consumption and
+ slower first-time usage due to populating the FieldCache. (Tim
+ Sturge via Mike McCandless)
+
Optimizations
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java?rev=721663&r1=721662&r2=721663&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCache.java Sat Nov 29 03:50:28 2008
@@ -38,7 +38,29 @@
/** Expert: Stores term text values and document ordering data. */
public static class StringIndex {
-
+
+ public int binarySearchLookup(String key) {
+ // this special case is the reason that Arrays.binarySearch() isn't useful.
+ if (key == null)
+ return 0;
+
+ int low = 1;
+ int high = lookup.length-1;
+
+ while (low <= high) {
+ int mid = (low + high) >> 1;
+ int cmp = lookup[mid].compareTo(key);
+
+ if (cmp < 0)
+ low = mid + 1;
+ else if (cmp > 0)
+ high = mid - 1;
+ else
+ return mid; // key found
+ }
+ return -(low + 1); // key not found.
+ }
+
/** All the term values, in natural order. */
public final String[] lookup;
Added: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java?rev=721663&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java Sat Nov 29 03:50:28 2008
@@ -0,0 +1,182 @@
+package org.apache.lucene.search;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+
+/**
+ * A range filter built on top of a cached single term field (in FieldCache).
+ *
+ * FieldCacheRangeFilter builds a single cache for the field the first time it is used.
+ *
+ * Each subsequent FieldCacheRangeFilter on the same field then reuses this cache,
+ * even if the range itself changes.
+ *
+ * This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast)
+ * as building a RangeFilter (or ConstantScoreRangeQuery on a RangeFilter) for each query.
+ * However, if the range never changes it is slower (around 2x as slow) than building a
+ * CachingWrapperFilter on top of a single RangeFilter.
+ *
+ * As with all FieldCache based functionality, FieldCacheRangeFilter is only valid for
+ * fields which contain zero or one terms for each document. Thus it works on dates,
+ * prices and other single value fields but will not work on regular text fields. It is
+ * preferable to use an UN_TOKENIZED field to ensure that there is only a single term.
+ *
+ * Also, collation is done at the time the FieldCache is built; to change
+ * collation you need to override the getFieldCache() method to change the underlying cache.
+ */
+
+public class FieldCacheRangeFilter extends Filter {
+ private String field;
+ private String lowerVal;
+ private String upperVal;
+ private boolean includeLower;
+ private boolean includeUpper;
+
+ public FieldCacheRangeFilter(
+ String field,
+ String lowerVal,
+ String upperVal,
+ boolean includeLower,
+ boolean includeUpper) {
+ this.field = field;
+ this.lowerVal = lowerVal;
+ this.upperVal = upperVal;
+ this.includeLower = includeLower;
+ this.includeUpper = includeUpper;
+ }
+
+ public FieldCache getFieldCache() {
+ return FieldCache.DEFAULT;
+ }
+
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ return new RangeMultiFilterDocIdSet(getFieldCache().getStringIndex(reader, field));
+ }
+
+ public String toString() {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append(field);
+ buffer.append(":");
+ buffer.append(includeLower ? "[" : "{");
+ if (null != lowerVal) {
+ buffer.append(lowerVal);
+ }
+ buffer.append("-");
+ if (null != upperVal) {
+ buffer.append(upperVal);
+ }
+ buffer.append(includeUpper ? "]" : "}");
+ return buffer.toString();
+ }
+
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof FieldCacheRangeFilter)) return false;
+ FieldCacheRangeFilter other = (FieldCacheRangeFilter) o;
+
+ if (!this.field.equals(other.field)
+ || this.includeLower != other.includeLower
+ || this.includeUpper != other.includeUpper
+ ) { return false; }
+ if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
+ if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
+ return true;
+ }
+
+ public int hashCode() {
+ int h = field.hashCode();
+ h ^= lowerVal != null ? lowerVal.hashCode() : 550356204;
+ h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper
+ h ^= (upperVal != null ? (upperVal.hashCode()) : -1674416163);
+ h ^= (includeLower ? 1549299360 : -365038026)
+ ^ (includeUpper ? 1721088258 : 1948649653);
+
+ return h;
+ }
+
+ protected class RangeMultiFilterDocIdSet extends DocIdSet {
+ private int inclusiveLowerPoint;
+ private int inclusiveUpperPoint;
+ private FieldCache.StringIndex fcsi;
+
+ public RangeMultiFilterDocIdSet(FieldCache.StringIndex fcsi) {
+ this.fcsi = fcsi;
+ initialize();
+ }
+
+ private void initialize() {
+ int lowerPoint = fcsi.binarySearchLookup(lowerVal);
+ if (includeLower && lowerPoint >= 0) {
+ inclusiveLowerPoint = lowerPoint;
+ } else if (lowerPoint >= 0) {
+ inclusiveLowerPoint = lowerPoint+1;
+ } else {
+ inclusiveLowerPoint = -lowerPoint-1;
+ }
+ int upperPoint = fcsi.binarySearchLookup(upperVal);
+ if (includeUpper && upperPoint >= 0) {
+ inclusiveUpperPoint = upperPoint;
+ } else if (upperPoint >= 0) {
+ inclusiveUpperPoint = upperPoint - 1;
+ } else {
+ inclusiveUpperPoint = -upperPoint - 2;
+ }
+ }
+
+ public DocIdSetIterator iterator() {
+ return new RangeMultiFilterIterator();
+ }
+
+ protected class RangeMultiFilterIterator extends DocIdSetIterator {
+ private int doc = -1;
+
+ public int doc() {
+ return doc;
+ }
+
+ public boolean next() {
+ try {
+ do {
+ doc++;
+ } while (fcsi.order[doc] > inclusiveUpperPoint
+ || fcsi.order[doc] < inclusiveLowerPoint);
+ return true;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ doc = Integer.MAX_VALUE;
+ return false;
+ }
+ }
+
+ public boolean skipTo(int target) {
+ try {
+ doc = target;
+ while (fcsi.order[doc] > inclusiveUpperPoint
+ || fcsi.order[doc] < inclusiveLowerPoint) {
+ doc++;
+ }
+ return true;
+ } catch (ArrayIndexOutOfBoundsException e) {
+ doc = Integer.MAX_VALUE;
+ return false;
+ }
+ }
+ }
+ }
+}
Propchange: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java?rev=721663&r1=721662&r2=721663&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/RangeFilter.java Sat Nov 29 03:50:28 2008
@@ -31,6 +31,9 @@
* This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
*
* </p>
+ *
+ * If you construct a large number of range filters with different ranges but on the
+ * same field, {@link FieldCacheRangeFilter} may have significantly better performance.
*/
public class RangeFilter extends Filter {
Added: lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java?rev=721663&view=auto
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (added)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java Sat Nov 29 03:50:28 2008
@@ -0,0 +1,379 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.text.Collator;
+import java.util.Locale;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * A basic 'positive' Unit test class for the RangeFilter class.
+ *
+ * <p>
+ * NOTE: at the moment, this class only tests for 'positive' results,
+ * it does not verify the results to ensure there are no 'false positives',
+ * nor does it adequately test 'negative' results. It also does not test
+ * that garbage in results in an Exception.
+ */
+public class TestFieldCacheRangeFilter extends BaseTestRangeFilter {
+
+ public TestFieldCacheRangeFilter(String name) {
+ super(name);
+ }
+ public TestFieldCacheRangeFilter() {
+ super();
+ }
+
+ public void testRangeFilterId() throws IOException {
+
+ IndexReader reader = IndexReader.open(signedIndex.index);
+ IndexSearcher search = new IndexSearcher(reader);
+
+ int medId = ((maxId - minId) / 2);
+
+ String minIP = pad(minId);
+ String maxIP = pad(maxId);
+ String medIP = pad(medId);
+
+ int numDocs = reader.numDocs();
+
+ assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+ ScoreDoc[] result;
+ Query q = new TermQuery(new Term("body","body"));
+
+ // test id, bounded on both ends
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T), numDocs).scoreDocs;
+ assertEquals("find all", numDocs, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F), numDocs).scoreDocs;
+ assertEquals("all but last", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T), numDocs).scoreDocs;
+ assertEquals("all but first", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F), numDocs).scoreDocs;
+ assertEquals("all but ends", numDocs-2, result.length);
+
+ result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T), numDocs).scoreDocs;
+ assertEquals("med and up", 1+ maxId-medId, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,medIP,T,T), numDocs).scoreDocs;
+ assertEquals("up to med", 1+ medId-minId, result.length);
+
+ // unbounded id
+
+ result = search.search(q,new RangeFilter("id",minIP,null,T,F), numDocs).scoreDocs;
+ assertEquals("min and up", numDocs, result.length);
+
+ result = search.search(q,new RangeFilter("id",null,maxIP,F,T), numDocs).scoreDocs;
+ assertEquals("max and down", numDocs, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,null,F,F), numDocs).scoreDocs;
+ assertEquals("not min, but up", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("id",null,maxIP,F,F), numDocs).scoreDocs;
+ assertEquals("not max, but down", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F), numDocs).scoreDocs;
+ assertEquals("med and up, not max", maxId-medId, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,medIP,F,T), numDocs).scoreDocs;
+ assertEquals("not min, up to med", medId-minId, result.length);
+
+ // very small sets
+
+ result = search.search(q,new RangeFilter("id",minIP,minIP,F,F), numDocs).scoreDocs;
+ assertEquals("min,min,F,F", 0, result.length);
+ result = search.search(q,new RangeFilter("id",medIP,medIP,F,F), numDocs).scoreDocs;
+ assertEquals("med,med,F,F", 0, result.length);
+ result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F), numDocs).scoreDocs;
+ assertEquals("max,max,F,F", 0, result.length);
+
+ result = search.search(q,new RangeFilter("id",minIP,minIP,T,T), numDocs).scoreDocs;
+ assertEquals("min,min,T,T", 1, result.length);
+ result = search.search(q,new RangeFilter("id",null,minIP,F,T), numDocs).scoreDocs;
+ assertEquals("nul,min,F,T", 1, result.length);
+
+ result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T), numDocs).scoreDocs;
+ assertEquals("max,max,T,T", 1, result.length);
+ result = search.search(q,new RangeFilter("id",maxIP,null,T,F), numDocs).scoreDocs;
+ assertEquals("max,nul,T,T", 1, result.length);
+
+ result = search.search(q,new RangeFilter("id",medIP,medIP,T,T), numDocs).scoreDocs;
+ assertEquals("med,med,T,T", 1, result.length);
+
+ }
+
+ public void testRangeFilterIdCollating() throws IOException {
+
+ IndexReader reader = IndexReader.open(signedIndex.index);
+ IndexSearcher search = new IndexSearcher(reader);
+
+ Collator c = Collator.getInstance(Locale.ENGLISH);
+
+ int medId = ((maxId - minId) / 2);
+
+ String minIP = pad(minId);
+ String maxIP = pad(maxId);
+ String medIP = pad(medId);
+
+ int numDocs = reader.numDocs();
+
+ assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+ Hits result;
+ Query q = new TermQuery(new Term("body","body"));
+
+ // test id, bounded on both ends
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c));
+ assertEquals("find all", numDocs, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c));
+ assertEquals("all but last", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c));
+ assertEquals("all but first", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c));
+ assertEquals("all but ends", numDocs-2, result.length());
+
+ result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c));
+ assertEquals("med and up", 1+ maxId-medId, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c));
+ assertEquals("up to med", 1+ medId-minId, result.length());
+
+ // unbounded id
+
+ result = search.search(q,new RangeFilter("id",minIP,null,T,F,c));
+ assertEquals("min and up", numDocs, result.length());
+
+ result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c));
+ assertEquals("max and down", numDocs, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,null,F,F,c));
+ assertEquals("not min, but up", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c));
+ assertEquals("not max, but down", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c));
+ assertEquals("med and up, not max", maxId-medId, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c));
+ assertEquals("not min, up to med", medId-minId, result.length());
+
+ // very small sets
+
+ result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c));
+ assertEquals("min,min,F,F", 0, result.length());
+ result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c));
+ assertEquals("med,med,F,F", 0, result.length());
+ result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c));
+ assertEquals("max,max,F,F", 0, result.length());
+
+ result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c));
+ assertEquals("min,min,T,T", 1, result.length());
+ result = search.search(q,new RangeFilter("id",null,minIP,F,T,c));
+ assertEquals("nul,min,F,T", 1, result.length());
+
+ result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c));
+ assertEquals("max,max,T,T", 1, result.length());
+ result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c));
+ assertEquals("max,nul,T,T", 1, result.length());
+
+ result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c));
+ assertEquals("med,med,T,T", 1, result.length());
+ }
+
+ public void testRangeFilterRand() throws IOException {
+
+ IndexReader reader = IndexReader.open(signedIndex.index);
+ IndexSearcher search = new IndexSearcher(reader);
+
+ String minRP = pad(signedIndex.minR);
+ String maxRP = pad(signedIndex.maxR);
+
+ int numDocs = reader.numDocs();
+
+ assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+ ScoreDoc[] result;
+ Query q = new TermQuery(new Term("body","body"));
+
+ // test extremes, bounded on both ends
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T), numDocs).scoreDocs;
+ assertEquals("find all", numDocs, result.length);
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F), numDocs).scoreDocs;
+ assertEquals("all but biggest", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T), numDocs).scoreDocs;
+ assertEquals("all but smallest", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F), numDocs).scoreDocs;
+ assertEquals("all but extremes", numDocs-2, result.length);
+
+ // unbounded
+
+ result = search.search(q,new RangeFilter("rand",minRP,null,T,F), numDocs).scoreDocs;
+ assertEquals("smallest and up", numDocs, result.length);
+
+ result = search.search(q,new RangeFilter("rand",null,maxRP,F,T), numDocs).scoreDocs;
+ assertEquals("biggest and down", numDocs, result.length);
+
+ result = search.search(q,new RangeFilter("rand",minRP,null,F,F), numDocs).scoreDocs;
+ assertEquals("not smallest, but up", numDocs-1, result.length);
+
+ result = search.search(q,new RangeFilter("rand",null,maxRP,F,F), numDocs).scoreDocs;
+ assertEquals("not biggest, but down", numDocs-1, result.length);
+
+ // very small sets
+
+ result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F), numDocs).scoreDocs;
+ assertEquals("min,min,F,F", 0, result.length);
+ result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F), numDocs).scoreDocs;
+ assertEquals("max,max,F,F", 0, result.length);
+
+ result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T), numDocs).scoreDocs;
+ assertEquals("min,min,T,T", 1, result.length);
+ result = search.search(q,new RangeFilter("rand",null,minRP,F,T), numDocs).scoreDocs;
+ assertEquals("nul,min,F,T", 1, result.length);
+
+ result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T), numDocs).scoreDocs;
+ assertEquals("max,max,T,T", 1, result.length);
+ result = search.search(q,new RangeFilter("rand",maxRP,null,T,F), numDocs).scoreDocs;
+ assertEquals("max,nul,T,T", 1, result.length);
+
+ }
+
+ public void testRangeFilterRandCollating() throws IOException {
+
+ // using the unsigned index because collation seems to ignore hyphens
+ IndexReader reader = IndexReader.open(unsignedIndex.index);
+ IndexSearcher search = new IndexSearcher(reader);
+
+ Collator c = Collator.getInstance(Locale.ENGLISH);
+
+ String minRP = pad(unsignedIndex.minR);
+ String maxRP = pad(unsignedIndex.maxR);
+
+ int numDocs = reader.numDocs();
+
+ assertEquals("num of docs", numDocs, 1+ maxId - minId);
+
+ Hits result;
+ Query q = new TermQuery(new Term("body","body"));
+
+ // test extremes, bounded on both ends
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c));
+ assertEquals("find all", numDocs, result.length());
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c));
+ assertEquals("all but biggest", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c));
+ assertEquals("all but smallest", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c));
+ assertEquals("all but extremes", numDocs-2, result.length());
+
+ // unbounded
+
+ result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c));
+ assertEquals("smallest and up", numDocs, result.length());
+
+ result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c));
+ assertEquals("biggest and down", numDocs, result.length());
+
+ result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c));
+ assertEquals("not smallest, but up", numDocs-1, result.length());
+
+ result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c));
+ assertEquals("not biggest, but down", numDocs-1, result.length());
+
+ // very small sets
+
+ result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c));
+ assertEquals("min,min,F,F", 0, result.length());
+ result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c));
+ assertEquals("max,max,F,F", 0, result.length());
+
+ result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c));
+ assertEquals("min,min,T,T", 1, result.length());
+ result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c));
+ assertEquals("nul,min,F,T", 1, result.length());
+
+ result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c));
+ assertEquals("max,max,T,T", 1, result.length());
+ result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c));
+ assertEquals("max,nul,T,T", 1, result.length());
+ }
+
+ public void testFarsi() throws Exception {
+
+ /* build an index */
+ RAMDirectory farsiIndex = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
+ IndexWriter.MaxFieldLength.LIMITED);
+ Document doc = new Document();
+ doc.add(new Field("content","\u0633\u0627\u0628",
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ doc.add(new Field("body", "body",
+ Field.Store.YES, Field.Index.UN_TOKENIZED));
+ writer.addDocument(doc);
+
+ writer.optimize();
+ writer.close();
+
+ IndexReader reader = IndexReader.open(farsiIndex);
+ IndexSearcher search = new IndexSearcher(reader);
+ Query q = new TermQuery(new Term("body","body"));
+
+ // Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
+ // RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
+ // characters properly.
+ Collator collator = Collator.getInstance(new Locale("ar"));
+
+ // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
+ // orders the U+0698 character before the U+0633 character, so the single
+ // index Term below should NOT be returned by a RangeFilter with a Farsi
+ // Collator (or an Arabic one for the case when Farsi is not supported).
+ Hits result = search.search
+ (q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator));
+ assertEquals("The index Term should not be included.", 0, result.length());
+
+ result = search.search
+ (q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator));
+ assertEquals("The index Term should be included.", 1, result.length());
+ search.close();
+ }
+}
Propchange: lucene/java/trunk/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java
------------------------------------------------------------------------------
svn:eol-style = native