You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by br...@apache.org on 2002/06/25 02:05:31 UTC

cvs commit: jakarta-lucene/src/test/org/apache/lucene/queryParser TestQueryParser.java

briangoetz    2002/06/24 17:05:31

  Modified:    src/java/org/apache/lucene/document DateField.java
                        Field.java
               src/java/org/apache/lucene/queryParser QueryParser.jj
               src/test/org/apache/lucene/queryParser TestQueryParser.java
  Log:
  Support for new range query syntax.  The delimiter is " TO ", but is optional
  for backward compatibility with previous syntax.  If the range arguments
  match the format supported by DateFormat.getDateInstance(DateFormat.SHORT),
  then they will be converted into the appropriate date strings a la DateField.
  
  Added Field.Keyword "constructor" for Date-valued arguments.
  
  Optimized DateField.timeToString function.
  
  Submitted by:	Brian Goetz
  
  Revision  Changes    Path
  1.4       +7 -2      jakarta-lucene/src/java/org/apache/lucene/document/DateField.java
  
  Index: DateField.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/DateField.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- DateField.java	9 Jun 2002 20:47:22 -0000	1.3
  +++ DateField.java	25 Jun 2002 00:05:31 -0000	1.4
  @@ -105,8 +105,13 @@
       if (s.length() > DATE_LEN)
         throw new RuntimeException("time too late");
   
  -    while (s.length() < DATE_LEN)
  -      s = "0" + s;				  // pad with leading zeros
  +    // Pad with leading zeros
  +    if (s.length() < DATE_LEN) {
  +      StringBuffer sb = new StringBuffer(s);
  +      while (sb.length() < DATE_LEN)
  +        sb.insert(0, ' ');
  +      s = sb.toString();
  +    }
   
       return s;
     }
  
  
  
  1.3       +8 -0      jakarta-lucene/src/java/org/apache/lucene/document/Field.java
  
  Index: Field.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/document/Field.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- Field.java	20 Nov 2001 05:22:31 -0000	1.2
  +++ Field.java	25 Jun 2002 00:05:31 -0000	1.3
  @@ -55,6 +55,7 @@
    */
   
   import java.io.Reader;
  +import java.util.Date;
   
   /**
     A field is a section of a Document.  Each field has two parts, a name and a
  @@ -89,6 +90,13 @@
       fields, like "title" or "subject". */
     public static final Field Text(String name, String value) {
       return new Field(name, value, true, true, true);
  +  }
  +
  +  /** Constructs a Date-valued Field that is tokenized and indexed,
  +    and is stored in the index, for return with hits.  Useful for short text
  +    fields, like "title" or "subject". */
  +  public static final Field Keyword(String name, Date value) {
  +    return new Field(name, DateField.dateToString(value), true, true, true);
     }
   
     /** Constructs a String-valued Field that is tokenized and indexed,
  
  
  
  1.18      +62 -33    jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj
  
  Index: QueryParser.jj
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- QueryParser.jj	20 May 2002 15:45:43 -0000	1.17
  +++ QueryParser.jj	25 Jun 2002 00:05:31 -0000	1.18
  @@ -65,8 +65,11 @@
   
   import java.util.Vector;
   import java.io.*;
  +import java.text.*;
  +import java.util.*;
   import org.apache.lucene.index.Term;
   import org.apache.lucene.analysis.*;
  +import org.apache.lucene.document.*;
   import org.apache.lucene.search.*;
   
   /**
  @@ -218,35 +221,30 @@
   
     private Query getRangeQuery(String field, 
                                 Analyzer analyzer, 
  -                              String queryText, 
  +                              String part1, 
  +                              String part2,
                                 boolean inclusive) 
     {
  -    // Use the analyzer to get all the tokens.  There should be 1 or 2.
  -    TokenStream source = analyzer.tokenStream(field, 
  -                                              new StringReader(queryText));
  -    Term[] terms = new Term[2];
  -    org.apache.lucene.analysis.Token t;
  +    boolean isDate = false, isNumber = false;
   
  -    for (int i = 0; i < 2; i++)
  -    {
  -      try 
  -      {
  -        t = source.next();
  -      } 
  -      catch (IOException e) 
  -      {
  -        t = null;
  -      }
  -      if (t != null)
  -      {
  -        String text = t.termText();
  -        if (!text.equalsIgnoreCase("NULL"))
  -        {
  -          terms[i] = new Term(field, text);
  -        }
  -      }
  +    try {
  +      DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
  +      df.setLenient(true);
  +      Date d1 = df.parse(part1);
  +      Date d2 = df.parse(part2);
  +      part1 = DateField.dateToString(d1);
  +      part2 = DateField.dateToString(d2);
  +      isDate = true;
       }
  -    return new RangeQuery(terms[0], terms[1], inclusive);
  +    catch (Exception e) { }
  +
  +    if (!isDate) {
  +      // @@@ Add number support
  +    }
  +
  +    return new RangeQuery(new Term(field, part1), 
  +                          new Term(field, part2), 
  +                          inclusive);
     }
   
     public static void main(String[] args) throws Exception {
  @@ -282,7 +280,7 @@
   | <#_WHITESPACE: ( " " | "\t" ) >
   }
   
  -<DEFAULT> SKIP : {
  +<DEFAULT, RangeIn, RangeEx> SKIP : {
     <<_WHITESPACE>>
   }
   
  @@ -303,14 +301,28 @@
   | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
   | <WILDTERM:  <_TERM_START_CHAR> 
                 (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
  -| <RANGEIN:   "[" ( ~[ "]" ] )+ "]">
  -| <RANGEEX:   "{" ( ~[ "}" ] )+ "}">
  +| <RANGEIN_START: "[" > : RangeIn
  +| <RANGEEX_START: "{" > : RangeEx
   }
   
   <Boost> TOKEN : {
   <NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
   }
   
  +<RangeIn> TOKEN : {
  +<RANGEIN_TO: "TO">
  +| <RANGEIN_END: "]"> : DEFAULT
  +| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
  +| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
  +}
  +
  +<RangeEx> TOKEN : {
  +<RANGEEX_TO: "TO">
  +| <RANGEEX_END: "}"> : DEFAULT
  +| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
  +| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
  +}
  +
   // *   Query  ::= ( Clause )*
   // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
   
  @@ -387,7 +399,7 @@
       
   
   Query Term(String field) : { 
  -  Token term, boost=null, slop=null;
  +  Token term, boost=null, slop=null, goop1, goop2;
     boolean prefix = false;
     boolean wildcard = false;
     boolean fuzzy = false;
  @@ -415,12 +427,29 @@
          else
            q = getFieldQuery(field, analyzer, term.image); 
        }
  -     | ( term=<RANGEIN> { rangein=true; } | term=<RANGEEX> )
  +     | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
  +         [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) 
  +         <RANGEIN_END> )
  +       [ <CARAT> boost=<NUMBER> ]
  +        {
  +          if (goop1.kind == RANGEIN_QUOTED)
  +            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
  +          if (goop2.kind == RANGEIN_QUOTED)
  +            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
  +
  +          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
  +        }
  +     | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
  +         [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) 
  +         <RANGEEX_END> )
          [ <CARAT> boost=<NUMBER> ]
           {
  -          q = getRangeQuery(field, analyzer, 
  -                            term.image.substring(1, term.image.length()-1), 
  -                            rangein);
  +          if (goop1.kind == RANGEEX_QUOTED)
  +            goop1.image = goop1.image.substring(1, goop1.image.length()-1);
  +          if (goop2.kind == RANGEEX_QUOTED)
  +            goop2.image = goop2.image.substring(1, goop2.image.length()-1);
  +
  +          q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
           }
        | term=<QUOTED> 
          [ slop=<SLOP> ]
  
  
  
  1.12      +25 -10    jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
  
  Index: TestQueryParser.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- TestQueryParser.java	6 May 2002 21:59:44 -0000	1.11
  +++ TestQueryParser.java	25 Jun 2002 00:05:31 -0000	1.12
  @@ -55,11 +55,14 @@
    */
   
   import java.io.*;
  +import java.text.*;
  +import java.util.*;
   import junit.framework.*;
   
   import org.apache.lucene.*;
   import org.apache.lucene.queryParser.*;
   import org.apache.lucene.search.*;
  +import org.apache.lucene.document.DateField;
   import org.apache.lucene.analysis.*;
   import org.apache.lucene.analysis.standard.*;
   import org.apache.lucene.analysis.Token;
  @@ -235,16 +238,28 @@
     }
   
     public void testRange() throws Exception {
  -    assertQueryEquals("[ a z]", null, "[a-z]");
  -    assertTrue(getQuery("[ a z]", null) instanceof RangeQuery);
  -    assertQueryEquals("[ a z ]", null, "[a-z]");
  -    assertQueryEquals("{ a z}", null, "{a-z}");
  -    assertQueryEquals("{ a z }", null, "{a-z}");
  -    assertQueryEquals("{ a z }^2.0", null, "{a-z}^2.0");
  -    assertQueryEquals("[ a z] OR bar", null, "[a-z] bar");
  -    assertQueryEquals("[ a z] AND bar", null, "+[a-z] +bar");
  -    assertQueryEquals("( bar blar { a z}) ", null, "bar blar {a-z}");
  -    assertQueryEquals("gack ( bar blar { a z}) ", null, "gack (bar blar {a-z})");
  +    assertQueryEquals("[ a TO z]", null, "[a-z]");
  +    assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery);
  +    assertQueryEquals("[ a TO z ]", null, "[a-z]");
  +    assertQueryEquals("{ a TO z}", null, "{a-z}");
  +    assertQueryEquals("{ a TO z }", null, "{a-z}");
  +    assertQueryEquals("{ a TO z }^2.0", null, "{a-z}^2.0");
  +    assertQueryEquals("[ a TO z] OR bar", null, "[a-z] bar");
  +    assertQueryEquals("[ a TO z] AND bar", null, "+[a-z] +bar");
  +    assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a-z}");
  +    assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a-z})");
  +  }
  +
  +  public String getDate(String s) throws Exception {
  +    DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
  +    return DateField.dateToString(df.parse(s));
  +  }
  +
  +  public void testDateRange() throws Exception {
  +    assertQueryEquals("[ 1/1/02 TO 1/4/02]", null, 
  +                      "[" + getDate("1/1/02") + "-" + getDate("1/4/02") + "]");
  +    assertQueryEquals("{  1/1/02    1/4/02   }", null, 
  +                      "{" + getDate("1/1/02") + "-" + getDate("1/4/02") + "}");
     }
   
     public void testEscaped() throws Exception {
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>