You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by eh...@apache.org on 2004/11/23 15:17:18 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/document DateField.java
ehatcher 2004/11/23 06:17:18
Modified: . CHANGES.txt
src/java/org/apache/lucene/search DateFilter.java
src/java/org/apache/lucene/document DateField.java
Added: src/java/org/apache/lucene/search RangeFilter.java
src/test/org/apache/lucene/search BaseTestRangeFilter.java
TestRangeFilter.java
Log:
Added RangeFilter and tests contributed by Chris M Hostetter.
Deprecated DateFilter and DateField.
Revision Changes Path
1.125 +4 -1 jakarta-lucene/CHANGES.txt
Index: CHANGES.txt
===================================================================
RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
retrieving revision 1.124
retrieving revision 1.125
diff -u -r1.124 -r1.125
--- CHANGES.txt 19 Nov 2004 21:04:17 -0000 1.124
+++ CHANGES.txt 23 Nov 2004 14:17:18 -0000 1.125
@@ -47,6 +47,8 @@
9. Added javadocs-internal to build.xml - bug #30360
(Paul Elschot via Otis)
+
+10. Added RangeFilter. (Chris M Hostetter via Erik)
API Changes
@@ -67,6 +69,7 @@
4. Add a serializable Parameter Class to standardize parameter enum
classes in BooleanClause and Field. (Christoph)
+
Bug fixes
1.11 +4 -1 jakarta-lucene/src/java/org/apache/lucene/search/DateFilter.java
Index: DateFilter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/DateFilter.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- DateFilter.java 29 Mar 2004 22:48:03 -0000 1.10
+++ DateFilter.java 23 Nov 2004 14:17:18 -0000 1.11
@@ -30,7 +30,10 @@
* A Filter that restricts search results to a range of time.
*
* <p>For this to work, documents must have been indexed with a
- * {@link DateField}.
+ * {@link DateField}.</p>
+ *
+ * @deprecated Instead, use {@link RangeFilter} combined with
+ * {@link org.apache.lucene.document.DateTools}.
*/
public class DateFilter extends Filter {
String field;
1.1 jakarta-lucene/src/java/org/apache/lucene/search/RangeFilter.java
Index: RangeFilter.java
===================================================================
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.BitSet;
import java.io.IOException;
import org.apache.lucene.search.Filter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader;
/**
* A Filter that restricts search results to a range of values in a given
* field.
*
* <p>
* This code borrows heavily from {@link RangeQuery}, but implemented as a Filter
* (much like {@link DateFilter})
* </p>
*/
public class RangeFilter extends Filter {
private String fieldName;
private String lowerTerm;
private String upperTerm;
private boolean includeLower;
private boolean includeUpper;
/**
* @param fieldName The field this range applies to
* @param lowerTerm The lower bound on this range
* @param upperTerm The upper bound on this range
* @param includeLower Does this range include the lower bound?
* @param includeUpper Does this range include the upper bound?
*/
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
boolean includeLower, boolean includeUpper) {
this.fieldName = fieldName;
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
if (null == lowerTerm && null == upperTerm) {
throw new IllegalArgumentException
("At least one value must be non-null");
}
if (includeLower && null == lowerTerm) {
throw new IllegalArgumentException
("The lower bound must be non-null to be inclusive");
}
if (includeUpper && null == upperTerm) {
throw new IllegalArgumentException
("The upper bound must be non-null to be inclusive");
}
}
/**
* Constructs a filter for field <code>field</code> matching
* less than or equal to <code>value</code>
*/
public static RangeFilter Less(String fieldName, String upperTerm) {
return new RangeFilter(fieldName, null, upperTerm, false, true);
}
/**
* Constructs a filter for field <code>field</code> matching
* greater than or equal to <code>lower</code>
*/
public static RangeFilter More(String fieldName, String lowerTerm) {
return new RangeFilter(fieldName, lowerTerm, null, true, false);
}
/**
* Returns a BitSet with true for documents which should be
* permitted in search results, and false for those that should
* not.
*/
public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
TermEnum enumerator =
(null != lowerTerm
? reader.terms(new Term(fieldName, lowerTerm))
: reader.terms(new Term(fieldName,"")));
try {
if (enumerator.term() == null) {
return bits;
}
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
}
while (enumerator.next());
} finally {
termDocs.close();
}
} finally {
enumerator.close();
}
return bits;
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(fieldName);
buffer.append(":");
buffer.append(includeLower ? "[" : "{");
if (null != lowerTerm) {
buffer.append(lowerTerm);
}
buffer.append("-");
if (null != upperTerm) {
buffer.append(upperTerm);
}
buffer.append(includeUpper ? "]" : "}");
return buffer.toString();
}
}
1.1 jakarta-lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java
Index: BaseTestRangeFilter.java
===================================================================
package org.apache.lucene.search;
import java.util.Random;
import junit.framework.TestCase;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.RAMDirectory;
public class BaseTestRangeFilter extends TestCase {
public static final boolean F = false;
public static final boolean T = true;
RAMDirectory index = new RAMDirectory();
Random rand = new Random(101); // use a set seed to test is deterministic
int maxR = Integer.MIN_VALUE;
int minR = Integer.MAX_VALUE;
int minId = 0;
int maxId = 10000;
static final int intLength = Integer.toString(Integer.MAX_VALUE).length();
/**
* a simple padding function that should work with any int
*/
public static String pad(int n) {
StringBuffer b = new StringBuffer(40);
String p = "0";
if (n < 0) {
p = "-";
n = Integer.MAX_VALUE + n + 1;
}
b.append(p);
String s = Integer.toString(n);
for (int i = s.length(); i <= intLength; i++) {
b.append("0");
}
b.append(s);
return b.toString();
}
public BaseTestRangeFilter(String name) {
super(name);
build();
}
public BaseTestRangeFilter() {
build();
}
private void build() {
try {
/* build an index */
IndexWriter writer = new IndexWriter(index,
new SimpleAnalyzer(), T);
for (int d = minId; d <= maxId; d++) {
Document doc = new Document();
doc.add(Field.Keyword("id",pad(d)));
int r= rand.nextInt();
if (maxR < r) {
maxR = r;
}
if (r < minR) {
minR = r;
}
doc.add(Field.Keyword("rand",pad(r)));
doc.add(Field.Keyword("body","body"));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
} catch (Exception e) {
throw new RuntimeException("can't build index", e);
}
}
public void testPad() {
int[] tests = new int[] {
-9999999, -99560, -100, -3, -1, 0, 3, 9, 10, 1000, 999999999
};
for (int i = 0; i < tests.length - 1; i++) {
int a = tests[i];
int b = tests[i+1];
String aa = pad(a);
String bb = pad(b);
String label = a + ":" + aa + " vs " + b + ":" + bb;
assertEquals("length of " + label, aa.length(), bb.length());
assertTrue("compare less than " + label, aa.compareTo(bb) < 0);
}
}
}
1.1 jakarta-lucene/src/test/org/apache/lucene/search/TestRangeFilter.java
Index: TestRangeFilter.java
===================================================================
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
* A basic 'positive' Unit test class for the RangeFilter class.
*
* <p>
* NOTE: at the moment, this class only tests for 'positive' results,
* it does not verify the results to ensure their are no 'false positives',
* nor does it adequately test 'negative' results. It also does not test
* that garbage in results in an Exception.
*/
public class TestRangeFilter extends BaseTestRangeFilter {
public TestRangeFilter(String name) {
super(name);
}
public TestRangeFilter() {
super();
}
public void testRangeFilterId() throws IOException {
IndexReader reader = IndexReader.open(index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
Hits result;
Query q = new TermQuery(new Term("body","body"));
// test id, bounded on both ends
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T));
assertEquals("find all", numDocs, result.length());
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F));
assertEquals("all but last", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T));
assertEquals("all but first", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F));
assertEquals("all but ends", numDocs-2, result.length());
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T));
assertEquals("med and up", 1+ maxId-medId, result.length());
result = search.search(q,new RangeFilter("id",minIP,medIP,T,T));
assertEquals("up to med", 1+ medId-minId, result.length());
// unbounded id
result = search.search(q,new RangeFilter("id",minIP,null,T,F));
assertEquals("min and up", numDocs, result.length());
result = search.search(q,new RangeFilter("id",null,maxIP,F,T));
assertEquals("max and down", numDocs, result.length());
result = search.search(q,new RangeFilter("id",minIP,null,F,F));
assertEquals("not min, but up", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",null,maxIP,F,F));
assertEquals("not max, but down", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F));
assertEquals("med and up, not max", maxId-medId, result.length());
result = search.search(q,new RangeFilter("id",minIP,medIP,F,T));
assertEquals("not min, up to med", medId-minId, result.length());
// very small sets
result = search.search(q,new RangeFilter("id",minIP,minIP,F,F));
assertEquals("min,min,F,F", 0, result.length());
result = search.search(q,new RangeFilter("id",medIP,medIP,F,F));
assertEquals("med,med,F,F", 0, result.length());
result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F));
assertEquals("max,max,F,F", 0, result.length());
result = search.search(q,new RangeFilter("id",minIP,minIP,T,T));
assertEquals("min,min,T,T", 1, result.length());
result = search.search(q,new RangeFilter("id",null,minIP,F,T));
assertEquals("nul,min,F,T", 1, result.length());
result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T));
assertEquals("max,max,T,T", 1, result.length());
result = search.search(q,new RangeFilter("id",maxIP,null,T,F));
assertEquals("max,nul,T,T", 1, result.length());
result = search.search(q,new RangeFilter("id",medIP,medIP,T,T));
assertEquals("med,med,T,T", 1, result.length());
}
public void testRangeFilterRand() throws IOException {
IndexReader reader = IndexReader.open(index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(minR);
String maxRP = pad(maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
Hits result;
Query q = new TermQuery(new Term("body","body"));
// test extremes, bounded on both ends
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T));
assertEquals("find all", numDocs, result.length());
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F));
assertEquals("all but biggest", numDocs-1, result.length());
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T));
assertEquals("all but smallest", numDocs-1, result.length());
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F));
assertEquals("all but extremes", numDocs-2, result.length());
// unbounded
result = search.search(q,new RangeFilter("rand",minRP,null,T,F));
assertEquals("smallest and up", numDocs, result.length());
result = search.search(q,new RangeFilter("rand",null,maxRP,F,T));
assertEquals("biggest and down", numDocs, result.length());
result = search.search(q,new RangeFilter("rand",minRP,null,F,F));
assertEquals("not smallest, but up", numDocs-1, result.length());
result = search.search(q,new RangeFilter("rand",null,maxRP,F,F));
assertEquals("not biggest, but down", numDocs-1, result.length());
// very small sets
result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F));
assertEquals("min,min,F,F", 0, result.length());
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F));
assertEquals("max,max,F,F", 0, result.length());
result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T));
assertEquals("min,min,T,T", 1, result.length());
result = search.search(q,new RangeFilter("rand",null,minRP,F,T));
assertEquals("nul,min,F,T", 1, result.length());
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T));
assertEquals("max,max,T,T", 1, result.length());
result = search.search(q,new RangeFilter("rand",maxRP,null,T,F));
assertEquals("max,nul,T,T", 1, result.length());
}
}
1.9 +2 -0 jakarta-lucene/src/java/org/apache/lucene/document/DateField.java
Index: DateField.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/DateField.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- DateField.java 5 Sep 2004 21:27:29 -0000 1.8
+++ DateField.java 23 Nov 2004 14:17:18 -0000 1.9
@@ -36,6 +36,8 @@
* Note: dates before 1970 cannot be used, and therefore cannot be
* indexed when using this class. See {@link DateTools} for an
* alternative without such a limitation.
+ *
+ * @deprecated Use {@link DateTools} instead.
*/
public class DateField {
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/document DateField.java
Posted by Erik Hatcher <er...@ehatchersolutions.com>.
On Nov 23, 2004, at 4:02 PM, Daniel Naber wrote:
> On Tuesday 23 November 2004 15:17, ehatcher@apache.org wrote:
>
>> + *
>> + * @deprecated Use {@link DateTools} instead.
>
> The new DateTools class isn't compatible to DateField, i.e. it returns
> different Strings. If we force people to use the new class it means
> they
> have to re-index. It might be okay to deprecate DateField, but then it
> should not be removed for 2.0 (like the other deprecated classes).
Ah, good point. The plot thickens.
Thanks for fixing up the licenses and other corrections.
Erik
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org
Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/document DateField.java
Posted by Daniel Naber <da...@t-online.de>.
On Tuesday 23 November 2004 15:17, ehatcher@apache.org wrote:
> + *
> + * @deprecated Use {@link DateTools} instead.
The new DateTools class isn't compatible to DateField, i.e. it returns
different Strings. If we force people to use the new class it means they
have to re-index. It might be okay to deprecate DateField, but then it
should not be removed for 2.0 (like the other deprecated classes).
Regards
Daniel
--
http://www.danielnaber.de
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org