You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2013/07/26 20:47:38 UTC
svn commit: r1507396 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/search/spans/
core/src/test/org/apache/lucene/search/spans/
Author: dsmiley
Date: Fri Jul 26 18:47:37 2013
New Revision: 1507396
URL: http://svn.apache.org/r1507396
Log:
LUCENE-5091: add not-near capability to SpanNotQuery
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Jul 26 18:47:37 2013
@@ -67,6 +67,9 @@ New features
* LUCENE-5118: SpatialStrategy.makeDistanceValueSource() now has an optional
multiplier for scaling degrees to another unit. (David Smiley)
+* LUCENE-5091: SpanNotQuery can now be configured with pre and post slop to act
+ as a hypothetical SpanNotNearQuery. (Tim Allison via David Smiley)
+
Bug Fixes
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java Fri Jul 26 18:47:37 2013
@@ -31,16 +31,36 @@ import java.util.Collection;
import java.util.Map;
import java.util.Set;
-/** Removes matches which overlap with another SpanQuery. */
+/** Removes matches which overlap with another SpanQuery or
+ * within a x tokens before or y tokens after another SpanQuery. */
public class SpanNotQuery extends SpanQuery implements Cloneable {
private SpanQuery include;
private SpanQuery exclude;
+ private final int pre;
+ private final int post;
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code>.*/
public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
+ this(include, exclude, 0, 0);
+ }
+
+
+ /** Construct a SpanNotQuery matching spans from <code>include</code> which
+ * have no overlap with spans from <code>exclude</code> within
+ * <code>dist</code> tokens of <code>include</code>. */
+ public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
+ this(include, exclude, dist, dist);
+ }
+
+ /** Construct a SpanNotQuery matching spans from <code>include</code> which
+ * have no overlap with spans from <code>exclude</code> within
+ * <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
+ public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
this.include = include;
this.exclude = exclude;
+ this.pre = (pre >=0) ? pre : 0;
+ this.post = (post >= 0) ? post : 0;
if (!include.getField().equals(exclude.getField()))
throw new IllegalArgumentException("Clauses must have same field.");
@@ -65,6 +85,10 @@ public class SpanNotQuery extends SpanQu
buffer.append(include.toString(field));
buffer.append(", ");
buffer.append(exclude.toString(field));
+ buffer.append(", ");
+ buffer.append(Integer.toString(pre));
+ buffer.append(", ");
+ buffer.append(Integer.toString(post));
buffer.append(")");
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
@@ -72,7 +96,8 @@ public class SpanNotQuery extends SpanQu
@Override
public SpanNotQuery clone() {
- SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone());
+ SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
+ (SpanQuery) exclude.clone(), pre, post);
spanNotQuery.setBoost(getBoost());
return spanNotQuery;
}
@@ -98,13 +123,13 @@ public class SpanNotQuery extends SpanQu
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
- && excludeSpans.end() <= includeSpans.start()) {
+ && excludeSpans.end() <= includeSpans.start() - pre) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
- || includeSpans.end() <= excludeSpans.start())
+ || includeSpans.end()+post <= excludeSpans.start())
break; // we found a match
moreInclude = includeSpans.next(); // intersected: keep scanning
@@ -126,13 +151,13 @@ public class SpanNotQuery extends SpanQu
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
- && excludeSpans.end() <= includeSpans.start()) {
+ && excludeSpans.end() <= includeSpans.start()-pre) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
- || includeSpans.end() <= excludeSpans.start())
+ || includeSpans.end()+post <= excludeSpans.start())
return true; // we found a match
return next(); // scan to next match
@@ -199,23 +224,28 @@ public class SpanNotQuery extends SpanQu
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof SpanNotQuery)) return false;
+ if (!super.equals(o))
+ return false;
SpanNotQuery other = (SpanNotQuery)o;
return this.include.equals(other.include)
&& this.exclude.equals(other.exclude)
- && this.getBoost() == other.getBoost();
+ && this.pre == other.pre
+ && this.post == other.post;
}
@Override
public int hashCode() {
- int h = include.hashCode();
- h = (h<<1) | (h >>> 31); // rotate left
+ int h = super.hashCode();
+ h = Integer.rotateLeft(h, 1);
+ h ^= include.hashCode();
+ h = Integer.rotateLeft(h, 1);
h ^= exclude.hashCode();
- h = (h<<1) | (h >>> 31); // rotate left
- h ^= Float.floatToRawIntBits(getBoost());
+ h = Integer.rotateLeft(h, 1);
+ h ^= pre;
+ h = Integer.rotateLeft(h, 1);
+ h ^= post;
return h;
}
-}
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/package.html Fri Jul 26 18:47:37 2013
@@ -38,8 +38,8 @@ and inter-phrase proximity (when constru
number of other {@link org.apache.lucene.search.spans.SpanQuery}s.</li>
<li>A {@link org.apache.lucene.search.spans.SpanNotQuery SpanNotQuery} removes spans
-matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap
-another. This can be used, e.g., to implement within-paragraph
+matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap (or comes
+near) another. This can be used, e.g., to implement within-paragraph
search.</li>
<li>A {@link org.apache.lucene.search.spans.SpanFirstQuery SpanFirstQuery} matches spans
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java Fri Jul 26 18:47:37 2013
@@ -364,6 +364,77 @@ public class TestBasics extends LuceneTe
}
@Test
+ public void testSpanNotWindowOne() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
+ SpanNotQuery query = new SpanNotQuery(near, term3, 1, 1);
+
+ checkHits(query, new int[]
+ {840, 842, 843, 844, 845, 846, 847, 848, 849,
+ 1840, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849});
+
+ assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 1842).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testSpanNotWindowTwoBefore() throws Exception {
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
+ SpanNotQuery query = new SpanNotQuery(near, term3, 2, 0);
+
+ checkHits(query, new int[]
+ {840, 841, 842, 843, 844, 845, 846, 847, 848, 849});
+
+ assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testSpanNotWindowNeg() throws Exception {
+ //test handling of invalid window < 0
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
+ SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
+ 4, true);
+ SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
+
+ SpanOrQuery or = new SpanOrQuery(term3);
+
+ SpanNotQuery query = new SpanNotQuery(near, or);
+
+ checkHits(query, new int[]
+ {801, 821, 831, 851, 861, 871, 881, 891,
+ 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+ assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+ }
+
+ @Test
+ public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
+ //test hitting two excludes before an include
+ SpanTermQuery term1 = new SpanTermQuery(new Term("field", "forty"));
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "two"));
+ SpanNearQuery near = new SpanNearQuery(new SpanTermQuery[]{term1, term2}, 2, true);
+ SpanTermQuery exclude = new SpanTermQuery(new Term("field", "one"));
+
+ SpanNotQuery query = new SpanNotQuery(near, exclude, 4, 1);
+
+ checkHits(query, new int[]
+ {42, 242, 342, 442, 542, 642, 742, 842, 942});
+
+ assertTrue(searcher.explain(query, 242).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 942).getValue() > 0.0f);
+ }
+
+ @Test
public void testSpanFirst() throws Exception {
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
SpanFirstQuery query = new SpanFirstQuery(term1, 1);
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java?rev=1507396&r1=1507395&r2=1507396&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java Fri Jul 26 18:47:37 2013
@@ -84,7 +84,8 @@ public class TestSpans extends LuceneTes
"u2 xx u1 u2",
"u2 u1 xx u2",
"u1 u2 xx u2",
- "t1 t2 t1 t3 t2 t3"
+ "t1 t2 t1 t3 t2 t3",
+ "s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx"
};
public SpanTermQuery makeSpanTermQuery(String text) {
@@ -502,4 +503,52 @@ public class TestSpans extends LuceneTes
reader.close();
dir.close();
}
+
+
+ public void testSpanNots() throws Throwable{
+ assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
+ assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
+
+ //focus on behind
+ assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
+ assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
+ assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
+ assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
+ assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
+
+ //focus on both
+ assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
+ assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
+ assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
+ assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
+
+ //focus on ahead
+ assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
+ assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
+ assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
+ assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
+ assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
+ assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
+
+ //exclude doesn't exist
+ assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
+
+ //include doesn't exist
+ assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
+
+ }
+
+ private int spanCount(String include, String exclude, int pre, int post) throws IOException{
+ SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
+ SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
+ SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
+ Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
+
+ int i = 0;
+ while (spans.next()){
+ i++;
+ }
+ return i;
+ }
+
}