You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by go...@apache.org on 2004/09/15 11:38:09 UTC

cvs commit: jakarta-lucene/src/java/org/apache/lucene/search PhraseScorer.java ExactPhraseScorer.java SloppyPhraseScorer.java PhraseQuery.java

goller      2004/09/15 02:38:09

  Modified:    src/java/org/apache/lucene/search PhraseScorer.java
                        ExactPhraseScorer.java SloppyPhraseScorer.java
                        PhraseQuery.java
  Log:
  PhraseQuery and PhrasePrefixQuery are extended. It's now
  possible to specify the relative position of a term within
  a phrase. This allows gaps and multiple terms at the same
  position.
  
  Revision  Changes    Path
  1.16      +3 -2      jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java
  
  Index: PhraseScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- PhraseScorer.java	6 Aug 2004 20:19:13 -0000	1.15
  +++ PhraseScorer.java	15 Sep 2004 09:38:09 -0000	1.16
  @@ -32,7 +32,8 @@
   
     private float freq;
   
  -  PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
  +
  +  PhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
                  byte[] norms) {
       super(similarity);
       this.norms = norms;
  @@ -41,7 +42,7 @@
   
       // convert tps to a list
       for (int i = 0; i < tps.length; i++) {
  -      PhrasePositions pp = new PhrasePositions(tps[i], i);
  +      PhrasePositions pp = new PhrasePositions(tps[i], positions[i]);
         if (last != null) {			  // add next to end of list
           last.next = pp;
         } else
  
  
  
  1.7       +2 -2      jakarta-lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java
  
  Index: ExactPhraseScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/ExactPhraseScorer.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ExactPhraseScorer.java	11 May 2004 17:18:03 -0000	1.6
  +++ ExactPhraseScorer.java	15 Sep 2004 09:38:09 -0000	1.7
  @@ -21,9 +21,9 @@
   
   final class ExactPhraseScorer extends PhraseScorer {
   
  -  ExactPhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
  +  ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
                       byte[] norms) throws IOException {
  -    super(weight, tps, similarity, norms);
  +    super(weight, tps, positions, similarity, norms);
     }
   
     protected final float phraseFreq() throws IOException {
  
  
  
  1.8       +2 -2      jakarta-lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
  
  Index: SloppyPhraseScorer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/SloppyPhraseScorer.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- SloppyPhraseScorer.java	6 Aug 2004 20:19:12 -0000	1.7
  +++ SloppyPhraseScorer.java	15 Sep 2004 09:38:09 -0000	1.8
  @@ -23,9 +23,9 @@
   final class SloppyPhraseScorer extends PhraseScorer {
       private int slop;
   
  -    SloppyPhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
  +    SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] positions, Similarity similarity,
                          int slop, byte[] norms) {
  -        super(weight, tps, similarity, norms);
  +        super(weight, tps, positions, similarity, norms);
           this.slop = slop;
       }
   
  
  
  
  1.16      +46 -12    jakarta-lucene/src/java/org/apache/lucene/search/PhraseQuery.java
  
  Index: PhraseQuery.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhraseQuery.java,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- PhraseQuery.java	29 Mar 2004 22:48:03 -0000	1.15
  +++ PhraseQuery.java	15 Sep 2004 09:38:09 -0000	1.16
  @@ -29,6 +29,7 @@
   public class PhraseQuery extends Query {
     private String field;
     private Vector terms = new Vector();
  +  private Vector positions = new Vector();
     private int slop = 0;
   
     /** Constructs an empty phrase query. */
  @@ -52,21 +53,52 @@
     /** Returns the slop.  See setSlop(). */
     public int getSlop() { return slop; }
   
  -  /** Adds a term to the end of the query phrase. */
  +  /**
  +   * Adds a term to the end of the query phrase.
  +   * The relative position of the term is the one immediately after the last term added.
  +   */
     public void add(Term term) {
  -    if (terms.size() == 0)
  -      field = term.field();
  -    else if (term.field() != field)
  -      throw new IllegalArgumentException
  -	("All phrase terms must be in the same field: " + term);
  -
  -    terms.addElement(term);
  +    int position = 0;
  +    if(positions.size() > 0)
  +        position = ((Integer) positions.lastElement()).intValue() + 1;
  +    
  +    add(term, position);
  +  }
  +  
  +  /**
  +   * Adds a term to the end of the query phrase.
  +   * The relative position of the term within the phrase is specified explicitly.
  +   * This allows e.g. phrases with more than one term at the same position
  +   * or phrases with gaps (e.g. in connection with stopwords).
  +   * 
  +   * @param term
  +   * @param position
  +   */
  +  public void add(Term term, int position) {
  +      if (terms.size() == 0)
  +          field = term.field();
  +      else if (term.field() != field)
  +          throw new IllegalArgumentException("All phrase terms must be in the same field: " + term);
  +      
  +      terms.addElement(term);
  +      positions.addElement(new Integer(position));
     }
   
     /** Returns the set of terms in this phrase. */
     public Term[] getTerms() {
       return (Term[])terms.toArray(new Term[0]);
     }
  +  
  +  /**
  +   * Returns the relative positions of terms in this phrase.
  +   * @return
  +   */
  +  public int[] getPositions() {
  +      int[] result = new int[positions.size()];
  +      for(int i = 0; i < positions.size(); i++)
  +          result[i] = ((Integer) positions.elementAt(i)).intValue();
  +      return result;
  +  }
   
     private class PhraseWeight implements Weight {
       private Searcher searcher;
  @@ -109,11 +141,11 @@
         }
   
         if (slop == 0)				  // optimize exact case
  -        return new ExactPhraseScorer(this, tps, getSimilarity(searcher),
  +        return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
                                        reader.norms(field));
         else
           return
  -          new SloppyPhraseScorer(this, tps, getSimilarity(searcher), slop,
  +          new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher), slop,
                                    reader.norms(field));
         
       }
  @@ -244,14 +276,16 @@
       PhraseQuery other = (PhraseQuery)o;
       return (this.getBoost() == other.getBoost())
         && (this.slop == other.slop)
  -      &&  this.terms.equals(other.terms);
  +      &&  this.terms.equals(other.terms)
  +      && this.positions.equals(other.positions);
     }
   
     /** Returns a hash code value for this object.*/
     public int hashCode() {
       return Float.floatToIntBits(getBoost())
         ^ Float.floatToIntBits(slop)
  -      ^ terms.hashCode();
  +      ^ terms.hashCode()
  +      ^ positions.hashCode();
     }
   
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org