You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by dn...@apache.org on 2004/10/10 17:27:16 UTC
cvs commit: jakarta-lucene/src/java/org/apache/lucene/search MultiPhraseQuery.java PhrasePrefixQuery.java

dnaber      2004/10/10 08:27:16

  Modified:    src/java/org/apache/lucene/search PhrasePrefixQuery.java
  Added:       src/java/org/apache/lucene/search MultiPhraseQuery.java
  Log:
  Deprecate PhrasePrefixQuery and copy it to MultiPhraseQuery; also fix toString() in MultiPhraseQuery
  
  Revision  Changes    Path
  1.18      +2 -1      jakarta-lucene/src/java/org/apache/lucene/search/PhrasePrefixQuery.java
  
  Index: PhrasePrefixQuery.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/PhrasePrefixQuery.java,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -r1.17 -r1.18
  --- PhrasePrefixQuery.java	17 Sep 2004 19:24:14 -0000	1.17
  +++ PhrasePrefixQuery.java	10 Oct 2004 15:27:15 -0000	1.18
  @@ -34,7 +34,8 @@
    * add(Term) on the term "Microsoft", then find all terms that has "app" as
    * prefix using IndexReader.terms(Term), and use PhrasePrefixQuery.add(Term[]
    * terms) to add them to the query.
  - *
  + * 
  + * @deprecated use {@link org.apache.lucene.search.MultiPhraseQuery} instead
    * @author Anders Nielsen
    * @version 1.0
    */
  
  
  
  1.1                  jakarta-lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
  
  Index: MultiPhraseQuery.java
  ===================================================================
  package org.apache.lucene.search;
  
  /**
   * Copyright 2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  import java.io.IOException;
  import java.util.ArrayList;
  import java.util.Iterator;
  import java.util.Vector;
  
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.MultipleTermPositions;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.index.TermPositions;
  import org.apache.lucene.search.Query;
  
  /**
   * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
   * method {@link #add(Term[])}.
   * To use this class, to search for the phrase "Microsoft app*" first use
   * add(Term) on the term "Microsoft", then find all terms that have "app" as
   * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
   * terms) to add them to the query.
   *
   * @author Anders Nielsen
   * @version 1.0
   */
  public class MultiPhraseQuery extends Query {
    private String field;
    private ArrayList termArrays = new ArrayList();
    private Vector positions = new Vector();
  
    private int slop = 0;
  
    /** Sets the phrase slop for this query.
     * @see PhraseQuery#setSlop(int)
     */
    public void setSlop(int s) { slop = s; }
  
    /** Sets the phrase slop for this query.
     * @see PhraseQuery#getSlop()
     */
    public int getSlop() { return slop; }
  
    /** Add a single term at the next position in the phrase.
     * @see PhraseQuery#add(Term)
     */
    public void add(Term term) { add(new Term[]{term}); }
  
    /** Add multiple terms at the next position in the phrase.  Any of the terms
     * may match.
     *
     * @see PhraseQuery#add(Term)
     */
    public void add(Term[] terms) {
      int position = 0;
      if (positions.size() > 0)
        position = ((Integer) positions.lastElement()).intValue() + 1;
  
      add(terms, position);
    }
    
    /**
     * Allows to specify the relative position of terms within the phrase.
     * 
     * @see PhraseQuery#add(Term, int)
     * @param terms
     * @param position
     */
    public void add(Term[] terms, int position) {
      if (termArrays.size() == 0)
        field = terms[0].field();
  
      for (int i = 0; i < terms.length; i++) {
        if (terms[i].field() != field) {
          throw new IllegalArgumentException(
              "All phrase terms must be in the same field (" + field + "): "
                  + terms[i]);
        }
      }
  
      termArrays.add(terms);
      positions.addElement(new Integer(position));
    }
    
    /**
     * Returns the relative positions of terms in this phrase.
     */
    public int[] getPositions() {
      int[] result = new int[positions.size()];
      for (int i = 0; i < positions.size(); i++)
        result[i] = ((Integer) positions.elementAt(i)).intValue();
      return result;
    }
  
    private class MultiPhraseWeight implements Weight {
      private Searcher searcher;
      private float value;
      private float idf;
      private float queryNorm;
      private float queryWeight;
  
      public MultiPhraseWeight(Searcher searcher) {
        this.searcher = searcher;
      }
  
      public Query getQuery() { return MultiPhraseQuery.this; }
      public float getValue() { return value; }
  
      public float sumOfSquaredWeights() throws IOException {
        Iterator i = termArrays.iterator();
        while (i.hasNext()) {
          Term[] terms = (Term[])i.next();
          for (int j=0; j<terms.length; j++)
            idf += getSimilarity(searcher).idf(terms[j], searcher);
        }
  
        queryWeight = idf * getBoost();             // compute query weight
        return queryWeight * queryWeight;           // square it
      }
  
      public void normalize(float queryNorm) {
        this.queryNorm = queryNorm;
        queryWeight *= queryNorm;                   // normalize query weight
        value = queryWeight * idf;                  // idf for document 
      }
  
      public Scorer scorer(IndexReader reader) throws IOException {
        if (termArrays.size() == 0)                  // optimize zero-term case
          return null;
      
        TermPositions[] tps = new TermPositions[termArrays.size()];
        for (int i=0; i<tps.length; i++) {
          Term[] terms = (Term[])termArrays.get(i);
        
          TermPositions p;
          if (terms.length > 1)
            p = new MultipleTermPositions(reader, terms);
          else
            p = reader.termPositions(terms[0]);
        
          if (p == null)
            return null;
        
          tps[i] = p;
        }
      
        if (slop == 0)
          return new ExactPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
                                       reader.norms(field));
        else
          return new SloppyPhraseScorer(this, tps, getPositions(), getSimilarity(searcher),
                                        slop, reader.norms(field));
      }
      
      public Explanation explain(IndexReader reader, int doc)
        throws IOException {
        Explanation result = new Explanation();
        result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
  
        Explanation idfExpl = new Explanation(idf, "idf("+getQuery()+")");
        
        // explain query weight
        Explanation queryExpl = new Explanation();
        queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
  
        Explanation boostExpl = new Explanation(getBoost(), "boost");
        if (getBoost() != 1.0f)
          queryExpl.addDetail(boostExpl);
  
        queryExpl.addDetail(idfExpl);
        
        Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
        queryExpl.addDetail(queryNormExpl);
        
        queryExpl.setValue(boostExpl.getValue() *
                           idfExpl.getValue() *
                           queryNormExpl.getValue());
  
        result.addDetail(queryExpl);
       
        // explain field weight
        Explanation fieldExpl = new Explanation();
        fieldExpl.setDescription("fieldWeight("+getQuery()+" in "+doc+
                                 "), product of:");
  
        Explanation tfExpl = scorer(reader).explain(doc);
        fieldExpl.addDetail(tfExpl);
        fieldExpl.addDetail(idfExpl);
  
        Explanation fieldNormExpl = new Explanation();
        byte[] fieldNorms = reader.norms(field);
        float fieldNorm =
          fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
        fieldNormExpl.setValue(fieldNorm);
        fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
        fieldExpl.addDetail(fieldNormExpl);
  
        fieldExpl.setValue(tfExpl.getValue() *
                           idfExpl.getValue() *
                           fieldNormExpl.getValue());
        
        result.addDetail(fieldExpl);
  
        // combine them
        result.setValue(queryExpl.getValue() * fieldExpl.getValue());
  
        if (queryExpl.getValue() == 1.0f)
          return fieldExpl;
  
        return result;
      }
    }
  
    protected Weight createWeight(Searcher searcher) {
      if (termArrays.size() == 1) {                 // optimize one-term case
        Term[] terms = (Term[])termArrays.get(0);
        BooleanQuery boq = new BooleanQuery();
        for (int i=0; i<terms.length; i++) {
          boq.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
        }
        boq.setBoost(getBoost());
        return boq.createWeight(searcher);
      }
      return new MultiPhraseWeight(searcher);
    }
  
    /** Prints a user-readable version of this query. */
    public final String toString(String f) {
      StringBuffer buffer = new StringBuffer();
      if (!field.equals(f)) {
        buffer.append(field);
        buffer.append(":");
      }
  
      buffer.append("\"");
      Iterator i = termArrays.iterator();
      while (i.hasNext()) {
        Term[] terms = (Term[])i.next();
        if (terms.length > 1) {
          buffer.append("(");
          for (int j = 0; j < terms.length; j++) {
            buffer.append(terms[j].text());
            if (j < terms.length-1)
              buffer.append(" ");
          }
          buffer.append(")");
        } else {
          buffer.append(terms[0].text());
        }
        if (i.hasNext())
          buffer.append(" ");
      }
      buffer.append("\"");
  
      if (slop != 0) {
        buffer.append("~");
        buffer.append(slop);
      }
  
      if (getBoost() != 1.0f) {
        buffer.append("^");
        buffer.append(Float.toString(getBoost()));
      }
  
      return buffer.toString();
    }
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org