You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/19 16:27:29 UTC

svn commit: r1660910 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/search/ core/src/test/org/apache/lucene/search/

Author: jpountz
Date: Thu Feb 19 15:27:29 2015
New Revision: 1660910

URL: http://svn.apache.org/r1660910
Log:
LUCENE-6255: Fix PhraseQuery inconsistencies.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSimpleSearchEquivalence.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1660910&r1=1660909&r2=1660910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Feb 19 15:27:29 2015
@@ -138,6 +138,11 @@ Other
 * LUCENE-6239: Removed RAMUsageEstimator's sun.misc.Unsafe calls.
   (Robert Muir, Dawid Weiss, Uwe Schindler)
 
+Changes in Runtime Behavior
+
+* LUCENE-6255: PhraseQuery now ignores leading holes and requires that
+  positions are positive and added in order. (Adrien Grand)
+
 ======================= Lucene 5.0.0 =======================
 
 New Features

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1660910&r1=1660909&r2=1660910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java Thu Feb 19 15:27:29 2015
@@ -42,12 +42,25 @@ import org.apache.lucene.util.ToStringUt
  * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
  * 
  * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
+ *
+ * <b>NOTE</b>: Leading holes don't have any particular meaning for this query
+ * and will be ignored. For instance this query:
+ * <pre class="prettyprint">
+ * PhraseQuery pq = new PhraseQuery();
+ * pq.add(new Term("body", "one"), 4);
+ * pq.add(new Term("body", "two"), 5);
+ * </pre>
+ * is equivalent to the below query:
+ * <pre class="prettyprint">
+ * PhraseQuery pq = new PhraseQuery();
+ * pq.add(new Term("body", "one"), 0);
+ * pq.add(new Term("body", "two"), 1);
+ * </pre>
  */
 public class PhraseQuery extends Query {
   private String field;
   private ArrayList<Term> terms = new ArrayList<>(4);
   private ArrayList<Integer> positions = new ArrayList<>(4);
-  private int maxPosition = 0;
   private int slop = 0;
 
   /** Constructs an empty phrase query. */
@@ -82,8 +95,9 @@ public class PhraseQuery extends Query {
    */
   public void add(Term term) {
     int position = 0;
-    if(positions.size() > 0)
-        position = positions.get(positions.size()-1).intValue() + 1;
+    if (positions.size() > 0) {
+      position = positions.get(positions.size()-1) + 1;
+    }
 
     add(term, position);
   }
@@ -96,6 +110,16 @@ public class PhraseQuery extends Query {
    * 
    */
   public void add(Term term, int position) {
+    if (positions.size() > 0) {
+      final int previousPosition = positions.get(positions.size()-1);
+      if (position < previousPosition) {
+        throw new IllegalArgumentException("Positions must be added in order. Got position="
+            + position + " while previous position was " + previousPosition);
+      }
+    } else if (position < 0) {
+      throw new IllegalArgumentException("Positions must be positive, got " + position);
+    }
+
     if (terms.size() == 0) {
       field = term.field();
     } else if (!term.field().equals(field)) {
@@ -104,7 +128,6 @@ public class PhraseQuery extends Query {
 
     terms.add(term);
     positions.add(Integer.valueOf(position));
-    if (position > maxPosition) maxPosition = position;
   }
 
   /** Returns the set of terms in this phrase. */
@@ -132,8 +155,21 @@ public class PhraseQuery extends Query {
       TermQuery tq = new TermQuery(terms.get(0));
       tq.setBoost(getBoost());
       return tq;
-    } else
+    } else if (positions.get(0).intValue() != 0) {
+      // PhraseWeight requires that positions start at 0 so we need to rebase
+      // positions
+      final Term[] terms = getTerms();
+      final int[] positions = getPositions();
+      PhraseQuery rewritten = new PhraseQuery();
+      for (int i = 0; i < terms.length; ++i) {
+        rewritten.add(terms[i], positions[i] - positions[0]);
+      }
+      rewritten.setBoost(getBoost());
+      rewritten.setSlop(getSlop());
+      return rewritten;
+    } else {
       return super.rewrite(reader);
+    }
   }
 
   static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
@@ -217,6 +253,12 @@ public class PhraseQuery extends Query {
     public PhraseWeight(IndexSearcher searcher, boolean needsScores)
       throws IOException {
       super(PhraseQuery.this);
+      final int[] positions = PhraseQuery.this.getPositions();
+      if (positions.length < 2) {
+        throw new IllegalStateException("PhraseWeight does not support less than 2 terms, call rewrite first");
+      } else if (positions[0] != 0) {
+        throw new IllegalStateException("PhraseWeight requires that the first position is 0, call rewrite first");
+      }
       this.needsScores = needsScores;
       this.similarity = searcher.getSimilarity();
       final IndexReaderContext context = searcher.getTopReaderContext();
@@ -340,6 +382,12 @@ public class PhraseQuery extends Query {
     }
 
     buffer.append("\"");
+    final int maxPosition;
+    if (positions.isEmpty()) {
+      maxPosition = -1;
+    } else {
+      maxPosition = positions.get(positions.size() - 1);
+    }
     String[] pieces = new String[maxPosition + 1];
     for (int i = 0; i < terms.size(); i++) {
       int pos = positions.get(i).intValue();

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=1660910&r1=1660909&r2=1660910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java Thu Feb 19 15:27:29 2015
@@ -375,13 +375,29 @@ public class TestPhraseQuery extends Luc
   }
   
   public void testToString() throws Exception {
-    PhraseQuery q = new PhraseQuery(); // Query "this hi this is a test is"
+    PhraseQuery q = new PhraseQuery();
+    assertEquals("\"\"", q.toString());
+
+    q.add(new Term("field", "hi"), 1);
+    assertEquals("field:\"? hi\"", q.toString());
+
+    q = new PhraseQuery(); // Query "this hi this is a test is"
     q.add(new Term("field", "hi"), 1);
     q.add(new Term("field", "test"), 5);
     
     assertEquals("field:\"? hi ? ? ? test\"", q.toString());
+
+    q = new PhraseQuery();
+    q.add(new Term("field", "hi"), 1);
     q.add(new Term("field", "hello"), 1);
+    q.add(new Term("field", "test"), 5);
     assertEquals("field:\"? hi|hello ? ? ? test\"", q.toString());
+
+    q.setSlop(5);
+    assertEquals("field:\"? hi|hello ? ? ? test\"~5", q.toString());
+
+    q.setBoost(2);
+    assertEquals("field:\"? hi|hello ? ? ? test\"~5^2.0", q.toString());
   }
 
   public void testWrappedPhrase() throws IOException {
@@ -696,6 +712,28 @@ public class TestPhraseQuery extends Luc
       fail("didn't get expected exception");
     } catch (IllegalArgumentException expected) {
       // expected exception
+    }
+  }
+
+  public void testNegativePosition() throws Exception {
+    PhraseQuery query = new PhraseQuery();
+    try {
+      query.add(new Term("field", "two"), -42);
+      fail("didn't get expected exception");
+    } catch (IllegalArgumentException expected) {
+      // expected exception
+    }
+  }
+
+  public void testBackwardPositions() throws Exception {
+    PhraseQuery query = new PhraseQuery();
+    query.add(new Term("field", "one"), 1);
+    query.add(new Term("field", "two"), 5);
+    try {
+      query.add(new Term("field", "three"), 4);
+      fail("didn't get expected exception");
+    } catch (IllegalArgumentException expected) {
+      // expected exception
     }
   }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSimpleSearchEquivalence.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSimpleSearchEquivalence.java?rev=1660910&r1=1660909&r2=1660910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSimpleSearchEquivalence.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSimpleSearchEquivalence.java Thu Feb 19 15:27:29 2015
@@ -188,4 +188,32 @@ public class TestSimpleSearchEquivalence
     q2.add(new TermQuery(t2), Occur.MUST);
     assertSameSet(q1, q2);
   }
+
+  /** Phrase positions are relative. */
+  public void testPhraseRelativePositions() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    PhraseQuery q1 = new PhraseQuery();
+    q1.add(t1);
+    q1.add(t2);
+    PhraseQuery q2 = new PhraseQuery();
+    q2.add(t1, 10000);
+    q2.add(t2, 10001);
+    assertSameScores(q1, q2);
+  }
+
+  /** Sloppy-phrase positions are relative. */
+  public void testSloppyPhraseRelativePositions() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    PhraseQuery q1 = new PhraseQuery();
+    q1.add(t1);
+    q1.add(t2);
+    q1.setSlop(2);
+    PhraseQuery q2 = new PhraseQuery();
+    q2.add(t1, 10000);
+    q2.add(t2, 10001);
+    q2.setSlop(2);
+    assertSameScores(q1, q2);
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java?rev=1660910&r1=1660909&r2=1660910&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestSloppyPhraseQuery.java Thu Feb 19 15:27:29 2015
@@ -248,7 +248,7 @@ public class TestSloppyPhraseQuery exten
     pq.add(new Term("lyrics", "drug"), 1);
     pq.add(new Term("lyrics", "drug"), 4);
     pq.setSlop(0);
-    assertEquals(0, is.search(pq, 4).totalHits);
+    assertEquals(1, is.search(pq, 4).totalHits);
     pq.setSlop(1);
     assertEquals(3, is.search(pq, 4).totalHits);
     pq.setSlop(2);