You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by eh...@apache.org on 2004/11/23 15:17:18 UTC

cvs commit: jakarta-lucene/src/java/org/apache/lucene/document DateField.java

ehatcher    2004/11/23 06:17:18

  Modified:    .        CHANGES.txt
               src/java/org/apache/lucene/search DateFilter.java
               src/java/org/apache/lucene/document DateField.java
  Added:       src/java/org/apache/lucene/search RangeFilter.java
               src/test/org/apache/lucene/search BaseTestRangeFilter.java
                        TestRangeFilter.java
  Log:
  Added RangeFilter and tests contributed by Chris M Hostetter.

Deprecated DateFilter and DateField.
  
  Revision  Changes    Path
  1.125     +4 -1      jakarta-lucene/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
  retrieving revision 1.124
  retrieving revision 1.125
  diff -u -r1.124 -r1.125
  --- CHANGES.txt	19 Nov 2004 21:04:17 -0000	1.124
  +++ CHANGES.txt	23 Nov 2004 14:17:18 -0000	1.125
  @@ -47,6 +47,8 @@
   
    9. Added javadocs-internal to build.xml - bug #30360
       (Paul Elschot via Otis)
  +    
  +10. Added RangeFilter. (Chris M Hostetter via Erik)
   
   API Changes
   
  @@ -67,6 +69,7 @@
       
    4. Add a serializable Parameter Class to standardize parameter enum
       classes in BooleanClause and Field. (Christoph)
  + 
   
   Bug fixes
   
  
  
  
  1.11      +4 -1      jakarta-lucene/src/java/org/apache/lucene/search/DateFilter.java
  
  Index: DateFilter.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/DateFilter.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- DateFilter.java	29 Mar 2004 22:48:03 -0000	1.10
  +++ DateFilter.java	23 Nov 2004 14:17:18 -0000	1.11
  @@ -30,7 +30,10 @@
    * A Filter that restricts search results to a range of time.
    *
    * <p>For this to work, documents must have been indexed with a
  - * {@link DateField}.
  + * {@link DateField}.</p>
  + * 
  + * @deprecated Instead, use {@link RangeFilter} combined with 
  + *             {@link org.apache.lucene.document.DateTools}.
    */
   public class DateFilter extends Filter {
     String field;
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/RangeFilter.java
  
  Index: RangeFilter.java
  ===================================================================
  package org.apache.lucene.search;
  
  
  
  /**
   * Copyright 2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  import java.util.BitSet;
  import java.io.IOException;
  
  import org.apache.lucene.search.Filter;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.index.TermDocs;
  import org.apache.lucene.index.TermEnum;
  import org.apache.lucene.index.IndexReader;
  
  /**
   * A Filter that restricts search results to a range of values in a given
   * field.
   * 
   * <p>
   * This code borrows heavily from {@link RangeQuery}, but implemented as a Filter
   * (much like {@link DateFilter})
   * </p>
   */
  public class RangeFilter extends Filter {
      
      private String fieldName;
      private String lowerTerm;
      private String upperTerm;
      private boolean includeLower;
      private boolean includeUpper;
  
      /**
       * @param fieldName The field this range applies to
       * @param lowerTerm The lower bound on this range
       * @param upperTerm The upper bound on this range
       * @param includeLower Does this range include the lower bound?
       * @param includeUpper Does this range include the upper bound?
       */
      public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
                         boolean includeLower, boolean includeUpper) {
          this.fieldName = fieldName;
          this.lowerTerm = lowerTerm;
          this.upperTerm = upperTerm;
          this.includeLower = includeLower;
          this.includeUpper = includeUpper;
          
          if (null == lowerTerm && null == upperTerm) {
              throw new IllegalArgumentException
                  ("At least one value must be non-null");
          }
          if (includeLower && null == lowerTerm) {
              throw new IllegalArgumentException
                  ("The lower bound must be non-null to be inclusive");
          }
          if (includeUpper && null == upperTerm) {
              throw new IllegalArgumentException
                  ("The upper bound must be non-null to be inclusive");
          }
      }
      
      /**
       * Constructs a filter for field <code>field</code> matching
       * less than or equal to <code>value</code>
       */
      public static RangeFilter Less(String fieldName, String upperTerm) {
          return new RangeFilter(fieldName, null, upperTerm, false, true);
      }
  
      /**
       * Constructs a filter for field <code>field</code> matching
       * greater than or equal to <code>lower</code>
       */
      public static RangeFilter More(String fieldName, String lowerTerm) {
          return new RangeFilter(fieldName, lowerTerm, null, true, false);
      }
      
      /**
       * Returns a BitSet with true for documents which should be
       * permitted in search results, and false for those that should
       * not.
       */
      public BitSet bits(IndexReader reader) throws IOException {
          BitSet bits = new BitSet(reader.maxDoc());
          TermEnum enumerator =
              (null != lowerTerm
               ? reader.terms(new Term(fieldName, lowerTerm))
               : reader.terms(new Term(fieldName,"")));
          
          try {
              
              if (enumerator.term() == null) {
                  return bits;
              }
              
              boolean checkLower = false;
              if (!includeLower) // make adjustments to set to exclusive
                  checkLower = true;
          
              TermDocs termDocs = reader.termDocs();
              try {
                  
                  do {
                      Term term = enumerator.term();
                      if (term != null && term.field().equals(fieldName)) {
                          if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
                              checkLower = false;
                              if (upperTerm != null) {
                                  int compare = upperTerm.compareTo(term.text());
                                  /* if beyond the upper term, or is exclusive and
                                   * this is equal to the upper term, break out */
                                  if ((compare < 0) ||
                                      (!includeUpper && compare==0)) {
                                      break;
                                  }
                              }
                              /* we have a good term, find the docs */
                              
                              termDocs.seek(enumerator.term());
                              while (termDocs.next()) {
                                  bits.set(termDocs.doc());
                              }
                          }
                      } else {
                          break;
                      }
                  }
                  while (enumerator.next());
                  
              } finally {
                  termDocs.close();
              }
          } finally {
              enumerator.close();
          }
  
          return bits;
      }
      
      public String toString() {
          StringBuffer buffer = new StringBuffer();
          buffer.append(fieldName);
          buffer.append(":");
          buffer.append(includeLower ? "[" : "{");
          if (null != lowerTerm) {
              buffer.append(lowerTerm);
          }
          buffer.append("-");
          if (null != upperTerm) {
              buffer.append(upperTerm);
          }
          buffer.append(includeUpper ? "]" : "}");
          return buffer.toString();
      }
  }
  
  
  
  1.1                  jakarta-lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java
  
  Index: BaseTestRangeFilter.java
  ===================================================================
  package org.apache.lucene.search;
  
  
  import java.util.Random;
  
  import junit.framework.TestCase;
  
  import org.apache.lucene.analysis.SimpleAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.store.RAMDirectory;
  
  public class BaseTestRangeFilter extends TestCase {
  
      public static final boolean F = false;
      public static final boolean T = true;
      
      RAMDirectory index = new RAMDirectory();
      Random rand = new Random(101); // use a set seed to test is deterministic
      
      int maxR = Integer.MIN_VALUE;
      int minR = Integer.MAX_VALUE;
  
      int minId = 0;
      int maxId = 10000;
  
      static final int intLength = Integer.toString(Integer.MAX_VALUE).length();
      
      /**
       * a simple padding function that should work with any int
       */
      public static String pad(int n) {
          StringBuffer b = new StringBuffer(40);
          String p = "0";
          if (n < 0) {
              p = "-";
              n = Integer.MAX_VALUE + n + 1;
          }
          b.append(p);
          String s = Integer.toString(n);
          for (int i = s.length(); i <= intLength; i++) {
              b.append("0");
          }
          b.append(s);
          
          return b.toString();
      }
  
      public BaseTestRangeFilter(String name) {
  	super(name);
          build();
      }
      public BaseTestRangeFilter() {
          build();
      }
      
      private void build() {
          try {
              
              /* build an index */
              IndexWriter writer = new IndexWriter(index,
                                                   new SimpleAnalyzer(), T);
  
              for (int d = minId; d <= maxId; d++) {
                  Document doc = new Document();
                  doc.add(Field.Keyword("id",pad(d)));
                  int r= rand.nextInt();
                  if (maxR < r) {
                      maxR = r;
                  }
                  if (r < minR) {
                      minR = r;
                  }
                  doc.add(Field.Keyword("rand",pad(r)));
                  doc.add(Field.Keyword("body","body"));
                  writer.addDocument(doc);
              }
              
              writer.optimize();
              writer.close();
  
          } catch (Exception e) {
              throw new RuntimeException("can't build index", e);
          }
  
      }
  
      public void testPad() {
  
          int[] tests = new int[] {
              -9999999, -99560, -100, -3, -1, 0, 3, 9, 10, 1000, 999999999
          };
          for (int i = 0; i < tests.length - 1; i++) {
              int a = tests[i];
              int b = tests[i+1];
              String aa = pad(a);
              String bb = pad(b);
              String label = a + ":" + aa + " vs " + b + ":" + bb;
              assertEquals("length of " + label, aa.length(), bb.length());
              assertTrue("compare less than " + label, aa.compareTo(bb) < 0);
          }
  
      }
  
  }
  
  
  
  1.1                  jakarta-lucene/src/test/org/apache/lucene/search/TestRangeFilter.java
  
  Index: TestRangeFilter.java
  ===================================================================
  package org.apache.lucene.search;
  
  
  import java.io.IOException;
  
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.Term;
  
  /**
   * A basic 'positive' Unit test class for the RangeFilter class.
   *
   * <p>
   * NOTE: at the moment, this class only tests for 'positive' results,
   * it does not verify the results to ensure their are no 'false positives',
   * nor does it adequately test 'negative' results.  It also does not test
   * that garbage in results in an Exception.
   */
  public class TestRangeFilter extends BaseTestRangeFilter {
  
      public TestRangeFilter(String name) {
  	super(name);
      }
      public TestRangeFilter() {
          super();
      }
  
      public void testRangeFilterId() throws IOException {
  
          IndexReader reader = IndexReader.open(index);
  	IndexSearcher search = new IndexSearcher(reader);
  
          int medId = ((maxId - minId) / 2);
          
          String minIP = pad(minId);
          String maxIP = pad(maxId);
          String medIP = pad(medId);
      
          int numDocs = reader.numDocs();
          
          assertEquals("num of docs", numDocs, 1+ maxId - minId);
          
  	Hits result;
          Query q = new TermQuery(new Term("body","body"));
  
          // test id, bounded on both ends
          
  	result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T));
  	assertEquals("find all", numDocs, result.length());
  
  	result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F));
  	assertEquals("all but last", numDocs-1, result.length());
  
  	result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T));
  	assertEquals("all but first", numDocs-1, result.length());
          
  	result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F));
          assertEquals("all but ends", numDocs-2, result.length());
      
          result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T));
          assertEquals("med and up", 1+ maxId-medId, result.length());
          
          result = search.search(q,new RangeFilter("id",minIP,medIP,T,T));
          assertEquals("up to med", 1+ medId-minId, result.length());
  
          // unbounded id
  
  	result = search.search(q,new RangeFilter("id",minIP,null,T,F));
  	assertEquals("min and up", numDocs, result.length());
  
  	result = search.search(q,new RangeFilter("id",null,maxIP,F,T));
  	assertEquals("max and down", numDocs, result.length());
  
  	result = search.search(q,new RangeFilter("id",minIP,null,F,F));
  	assertEquals("not min, but up", numDocs-1, result.length());
          
  	result = search.search(q,new RangeFilter("id",null,maxIP,F,F));
  	assertEquals("not max, but down", numDocs-1, result.length());
          
          result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F));
          assertEquals("med and up, not max", maxId-medId, result.length());
          
          result = search.search(q,new RangeFilter("id",minIP,medIP,F,T));
          assertEquals("not min, up to med", medId-minId, result.length());
  
          // very small sets
  
  	result = search.search(q,new RangeFilter("id",minIP,minIP,F,F));
  	assertEquals("min,min,F,F", 0, result.length());
  	result = search.search(q,new RangeFilter("id",medIP,medIP,F,F));
  	assertEquals("med,med,F,F", 0, result.length());
  	result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F));
  	assertEquals("max,max,F,F", 0, result.length());
                       
  	result = search.search(q,new RangeFilter("id",minIP,minIP,T,T));
  	assertEquals("min,min,T,T", 1, result.length());
  	result = search.search(q,new RangeFilter("id",null,minIP,F,T));
  	assertEquals("nul,min,F,T", 1, result.length());
  
  	result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T));
  	assertEquals("max,max,T,T", 1, result.length());
  	result = search.search(q,new RangeFilter("id",maxIP,null,T,F));
  	assertEquals("max,nul,T,T", 1, result.length());
  
  	result = search.search(q,new RangeFilter("id",medIP,medIP,T,T));
  	assertEquals("med,med,T,T", 1, result.length());
          
      }
  
      public void testRangeFilterRand() throws IOException {
  
          IndexReader reader = IndexReader.open(index);
  	IndexSearcher search = new IndexSearcher(reader);
  
          String minRP = pad(minR);
          String maxRP = pad(maxR);
      
          int numDocs = reader.numDocs();
          
          assertEquals("num of docs", numDocs, 1+ maxId - minId);
          
  	Hits result;
          Query q = new TermQuery(new Term("body","body"));
  
          // test extremes, bounded on both ends
          
  	result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T));
  	assertEquals("find all", numDocs, result.length());
  
  	result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F));
  	assertEquals("all but biggest", numDocs-1, result.length());
  
  	result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T));
  	assertEquals("all but smallest", numDocs-1, result.length());
          
  	result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F));
          assertEquals("all but extremes", numDocs-2, result.length());
      
          // unbounded
  
  	result = search.search(q,new RangeFilter("rand",minRP,null,T,F));
  	assertEquals("smallest and up", numDocs, result.length());
  
  	result = search.search(q,new RangeFilter("rand",null,maxRP,F,T));
  	assertEquals("biggest and down", numDocs, result.length());
  
  	result = search.search(q,new RangeFilter("rand",minRP,null,F,F));
  	assertEquals("not smallest, but up", numDocs-1, result.length());
          
  	result = search.search(q,new RangeFilter("rand",null,maxRP,F,F));
  	assertEquals("not biggest, but down", numDocs-1, result.length());
          
          // very small sets
  
  	result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F));
  	assertEquals("min,min,F,F", 0, result.length());
  	result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F));
  	assertEquals("max,max,F,F", 0, result.length());
                       
  	result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T));
  	assertEquals("min,min,T,T", 1, result.length());
  	result = search.search(q,new RangeFilter("rand",null,minRP,F,T));
  	assertEquals("nul,min,F,T", 1, result.length());
  
  	result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T));
  	assertEquals("max,max,T,T", 1, result.length());
  	result = search.search(q,new RangeFilter("rand",maxRP,null,T,F));
  	assertEquals("max,nul,T,T", 1, result.length());
          
      }
  
  }
  
  
  
  1.9       +2 -0      jakarta-lucene/src/java/org/apache/lucene/document/DateField.java
  
  Index: DateField.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/DateField.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- DateField.java	5 Sep 2004 21:27:29 -0000	1.8
  +++ DateField.java	23 Nov 2004 14:17:18 -0000	1.9
  @@ -36,6 +36,8 @@
    * Note: dates before 1970 cannot be used, and therefore cannot be
    * indexed when using this class. See {@link DateTools} for an
    * alternative without such a limitation.
  + * 
  + * @deprecated Use {@link DateTools} instead.
    */
   public class DateField {
     
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/document DateField.java

Posted by Erik Hatcher <er...@ehatchersolutions.com>.
On Nov 23, 2004, at 4:02 PM, Daniel Naber wrote:
> On Tuesday 23 November 2004 15:17, ehatcher@apache.org wrote:
>
>>   + *
>>   + * @deprecated Use {@link DateTools} instead.
>
> The new DateTools class isn't compatible to DateField, i.e. it returns
> different Strings. If we force people to use the new class it means 
> they
> have to re-index. It might be okay to deprecate DateField, but then it
> should not be removed for 2.0 (like the other deprecated classes).

Ah, good point.    The plot thickens.

Thanks for fixing up the licenses and other corrections.

	Erik


---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org


Re: cvs commit: jakarta-lucene/src/java/org/apache/lucene/document DateField.java

Posted by Daniel Naber <da...@t-online.de>.
On Tuesday 23 November 2004 15:17, ehatcher@apache.org wrote:

>   + *
>   + * @deprecated Use {@link DateTools} instead.

The new DateTools class isn't compatible to DateField, i.e. it returns 
different Strings. If we force people to use the new class it means they 
have to re-index. It might be okay to deprecate DateField, but then it 
should not be removed for 2.0 (like the other deprecated classes).

Regards
 Daniel

-- 
http://www.danielnaber.de

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org