You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2013/07/26 20:47:38 UTC

svn commit: r1507396 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/search/spans/ core/src/test/org/apache/lucene/search/spans/

Author: dsmiley
Date: Fri Jul 26 18:47:37 2013
New Revision: 1507396

URL: http://svn.apache.org/r1507396
Log:
LUCENE-5091: add not-near capability to SpanNotQuery

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jul 26 18:47:37 2013
@@ -67,6 +67,9 @@ New features
 * LUCENE-5118: SpatialStrategy.makeDistanceValueSource() now has an optional
   multiplier for scaling degrees to another unit. (David Smiley)
 
+* LUCENE-5091: SpanNotQuery can now be configured with pre and post slop to act
+  as a hypothetical SpanNotNearQuery. (Tim Allison via David Smiley)
+
 Bug Fixes
 
 * LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java Fri Jul 26 18:47:37 2013
@@ -31,16 +31,36 @@ import java.util.Collection;
 import java.util.Map;
 import java.util.Set;
 
-/** Removes matches which overlap with another SpanQuery. */
+/** Removes matches which overlap with another SpanQuery or 
+ * within a x tokens before or y tokens after another SpanQuery. */
 public class SpanNotQuery extends SpanQuery implements Cloneable {
   private SpanQuery include;
   private SpanQuery exclude;
+  private final int pre;
+  private final int post;
 
   /** Construct a SpanNotQuery matching spans from <code>include</code> which
    * have no overlap with spans from <code>exclude</code>.*/
   public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
+     this(include, exclude, 0, 0);
+  }
+
+  
+  /** Construct a SpanNotQuery matching spans from <code>include</code> which
+   * have no overlap with spans from <code>exclude</code> within 
+   * <code>dist</code> tokens of <code>include</code>. */
+  public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
+     this(include, exclude, dist, dist);
+  }
+  
+  /** Construct a SpanNotQuery matching spans from <code>include</code> which
+   * have no overlap with spans from <code>exclude</code> within 
+   * <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
+  public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
     this.include = include;
     this.exclude = exclude;
+    this.pre = (pre >=0) ? pre : 0;
+    this.post = (post >= 0) ? post : 0;
 
     if (!include.getField().equals(exclude.getField()))
       throw new IllegalArgumentException("Clauses must have same field.");
@@ -65,6 +85,10 @@ public class SpanNotQuery extends SpanQu
     buffer.append(include.toString(field));
     buffer.append(", ");
     buffer.append(exclude.toString(field));
+    buffer.append(", ");
+    buffer.append(Integer.toString(pre));
+    buffer.append(", ");
+    buffer.append(Integer.toString(post));
     buffer.append(")");
     buffer.append(ToStringUtils.boost(getBoost()));
     return buffer.toString();
@@ -72,7 +96,8 @@ public class SpanNotQuery extends SpanQu
 
   @Override
   public SpanNotQuery clone() {
-    SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone());
+    SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
+          (SpanQuery) exclude.clone(), pre, post);
     spanNotQuery.setBoost(getBoost());
     return  spanNotQuery;
   }
@@ -98,13 +123,13 @@ public class SpanNotQuery extends SpanQu
 
             while (moreExclude                    // while exclude is before
                    && includeSpans.doc() == excludeSpans.doc()
-                   && excludeSpans.end() <= includeSpans.start()) {
+                   && excludeSpans.end() <= includeSpans.start() - pre) {
               moreExclude = excludeSpans.next();  // increment exclude
             }
 
             if (!moreExclude                      // if no intersection
                 || includeSpans.doc() != excludeSpans.doc()
-                || includeSpans.end() <= excludeSpans.start())
+                || includeSpans.end()+post <= excludeSpans.start())
               break;                              // we found a match
 
             moreInclude = includeSpans.next();    // intersected: keep scanning
@@ -126,13 +151,13 @@ public class SpanNotQuery extends SpanQu
 
           while (moreExclude                      // while exclude is before
                  && includeSpans.doc() == excludeSpans.doc()
-                 && excludeSpans.end() <= includeSpans.start()) {
+                 && excludeSpans.end() <= includeSpans.start()-pre) {
             moreExclude = excludeSpans.next();    // increment exclude
           }
 
           if (!moreExclude                      // if no intersection
                 || includeSpans.doc() != excludeSpans.doc()
-                || includeSpans.end() <= excludeSpans.start())
+                || includeSpans.end()+post <= excludeSpans.start())
             return true;                          // we found a match
 
           return next();                          // scan to next match
@@ -199,23 +224,28 @@ public class SpanNotQuery extends SpanQu
     /** Returns true iff <code>o</code> is equal to this. */
   @Override
   public boolean equals(Object o) {
-    if (this == o) return true;
-    if (!(o instanceof SpanNotQuery)) return false;
+    if (!super.equals(o))
+      return false;
 
     SpanNotQuery other = (SpanNotQuery)o;
     return this.include.equals(other.include)
             && this.exclude.equals(other.exclude)
-            && this.getBoost() == other.getBoost();
+            && this.pre == other.pre 
+            && this.post == other.post;
   }
 
   @Override
   public int hashCode() {
-    int h = include.hashCode();
-    h = (h<<1) | (h >>> 31);  // rotate left
+    int h = super.hashCode();
+    h = Integer.rotateLeft(h, 1);
+    h ^= include.hashCode();
+    h = Integer.rotateLeft(h, 1);
     h ^= exclude.hashCode();
-    h = (h<<1) | (h >>> 31);  // rotate left
-    h ^= Float.floatToRawIntBits(getBoost());
+    h = Integer.rotateLeft(h, 1);
+    h ^= pre;
+    h = Integer.rotateLeft(h, 1);
+    h ^= post;
     return h;
   }
 
-}
+}
\ No newline at end of file

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html Fri Jul 26 18:47:37 2013
@@ -38,8 +38,8 @@ and inter-phrase proximity (when constru
 number of other {@link org.apache.lucene.search.spans.SpanQuery}s.</li>
 
 <li>A {@link org.apache.lucene.search.spans.SpanNotQuery SpanNotQuery} removes spans
-matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap
-another.  This can be used, e.g., to implement within-paragraph
+matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap (or comes
+near) another.  This can be used, e.g., to implement within-paragraph
 search.</li>
 
 <li>A {@link org.apache.lucene.search.spans.SpanFirstQuery SpanFirstQuery} matches spans

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java Fri Jul 26 18:47:37 2013
@@ -364,6 +364,77 @@ public class TestBasics extends LuceneTe
   }
   
   @Test
+  public void testSpanNotWindowOne() throws Exception {
+    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
+    SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+                                           4, true);
+    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
+    SpanNotQuery query = new SpanNotQuery(near, term3, 1, 1);
+
+    checkHits(query, new int[]
+      {840, 842, 843, 844, 845, 846, 847, 848, 849,
+          1840, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849});
+
+    assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
+    assertTrue(searcher.explain(query, 1842).getValue() > 0.0f);
+  }
+  
+  @Test
+  public void testSpanNotWindowTwoBefore() throws Exception {
+    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+    SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
+    SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+                                           4, true);
+    SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
+    SpanNotQuery query = new SpanNotQuery(near, term3, 2, 0);
+
+    checkHits(query, new int[]
+      {840, 841, 842, 843, 844, 845, 846, 847, 848, 849});
+
+    assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
+    assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
+  }
+
+  @Test
+  public void testSpanNotWindowNeg() throws Exception {
+     //test handling of invalid window < 0
+     SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+     SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
+     SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+                                            4, true);
+     SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
+
+     SpanOrQuery or = new SpanOrQuery(term3);
+
+     SpanNotQuery query = new SpanNotQuery(near, or);
+
+     checkHits(query, new int[]
+       {801, 821, 831, 851, 861, 871, 881, 891,
+               1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+     assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+     assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+  }
+  
+  @Test
+  public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
+     //test hitting two excludes before an include
+     SpanTermQuery term1 = new SpanTermQuery(new Term("field", "forty"));
+     SpanTermQuery term2 = new SpanTermQuery(new Term("field", "two"));
+     SpanNearQuery near = new SpanNearQuery(new SpanTermQuery[]{term1, term2}, 2, true);
+     SpanTermQuery exclude = new SpanTermQuery(new Term("field", "one"));
+
+     SpanNotQuery query = new SpanNotQuery(near, exclude, 4, 1);
+
+     checkHits(query, new int[]
+       {42, 242, 342, 442, 542, 642, 742, 842, 942});
+
+     assertTrue(searcher.explain(query, 242).getValue() > 0.0f);
+     assertTrue(searcher.explain(query, 942).getValue() > 0.0f);
+  }
+  
+  @Test
   public void testSpanFirst() throws Exception {
     SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
     SpanFirstQuery query = new SpanFirstQuery(term1, 1);

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java Fri Jul 26 18:47:37 2013
@@ -84,7 +84,8 @@ public class TestSpans extends LuceneTes
     "u2 xx u1 u2",
     "u2 u1 xx u2",
     "u1 u2 xx u2",
-    "t1 t2 t1 t3 t2 t3"
+    "t1 t2 t1 t3 t2 t3",
+    "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx"
   };
 
   public SpanTermQuery makeSpanTermQuery(String text) {
@@ -502,4 +503,52 @@ public class TestSpans extends LuceneTes
     reader.close();
     dir.close();
   }
+  
+  
+  public void testSpanNots() throws Throwable{
+     assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
+     assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
+     
+     //focus on behind
+     assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
+     assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
+     assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
+     assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
+     assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
+     
+     //focus on both
+     assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
+     assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
+     assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
+     assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
+     
+     //focus on ahead
+     assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));  
+     assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));  
+     assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));  
+     assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));  
+     assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
+     assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
+     
+     //exclude doesn't exist
+     assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
+
+     //include doesn't exist
+     assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
+
+  }
+  
+  private int spanCount(String include, String exclude, int pre, int post) throws IOException{
+     SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
+     SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
+     SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
+     Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
+
+     int i = 0;
+     while (spans.next()){
+        i++;
+     }
+     return i;
+  }
+  
 }