You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/01/28 16:36:32 UTC
svn commit: r1439450 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/queries/
lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
Author: simonw
Date: Mon Jan 28 15:36:31 2013
New Revision: 1439450
URL: http://svn.apache.org/viewvc?rev=1439450&view=rev
Log:
LUCENE-4727: use float as minShouldMatch on CommonTermsQuery
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/queries/ (props changed)
lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
Modified: lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java?rev=1439450&r1=1439449&r2=1439450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java Mon Jan 28 15:36:31 2013
@@ -74,7 +74,7 @@ public class CommonTermsQuery extends Qu
protected final Occur highFreqOccur;
protected float lowFreqBoost = 1.0f;
protected float highFreqBoost = 1.0f;
- protected int minNrShouldMatch = 0;
+ protected float minNrShouldMatch = 0;
/**
* Creates a new {@link CommonTermsQuery}
@@ -84,7 +84,7 @@ public class CommonTermsQuery extends Qu
* @param lowFreqOccur
* {@link Occur} used for low frequency terms
* @param maxTermFrequency
- * a value in [0..1] (or absolute number >=1) representing the
+ * a value in [0..1) (or absolute number >=1) representing the
* maximum threshold of a terms document frequency to be considered a
* low frequency term.
* @throws IllegalArgumentException
@@ -104,7 +104,7 @@ public class CommonTermsQuery extends Qu
* @param lowFreqOccur
* {@link Occur} used for low frequency terms
* @param maxTermFrequency
- * a value in [0..1] (or absolute number >=1) representing the
+ * a value in [0..1) (or absolute number >=1) representing the
* maximum threshold of a terms document frequency to be considered a
* low frequency term.
* @param disableCoord
@@ -160,15 +160,19 @@ public class CommonTermsQuery extends Qu
return buildQuery(maxDoc, contextArray, queryTerms);
}
+ protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
+ if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
+ return (int) minNrShouldMatch;
+ }
+ return (int) (Math.round(minNrShouldMatch * numOptional));
+ }
+
protected Query buildQuery(final int maxDoc,
final TermContext[] contextArray, final Term[] queryTerms) {
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
BooleanQuery highFreq = new BooleanQuery(disableCoord);
highFreq.setBoost(highFreqBoost);
lowFreq.setBoost(lowFreqBoost);
- if (lowFreqOccur == Occur.SHOULD) {
- lowFreq.setMinimumNumberShouldMatch(minNrShouldMatch);
- }
BooleanQuery query = new BooleanQuery(true);
for (int i = 0; i < queryTerms.length; i++) {
TermContext termContext = contextArray[i];
@@ -186,6 +190,11 @@ public class CommonTermsQuery extends Qu
}
}
+ final int numLowFreqClauses = lowFreq.clauses().size();
+ if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
+ int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
+ lowFreq.setMinimumNumberShouldMatch(minMustMatch);
+ }
if (lowFreq.clauses().isEmpty()) {
/*
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
@@ -265,7 +274,9 @@ public class CommonTermsQuery extends Qu
/**
* Specifies a minimum number of the optional BooleanClauses which must be
* satisfied in order to produce a match on the low frequency terms query
- * part.
+ * part. This method accepts a float value in the range [0..1) as a fraction
+ * of the actual query terms in the low frequent clause or a number
+ * <tt>>=1</tt> as an absolut number of clauses that need to match.
*
* <p>
* By default no optional clauses are necessary for a match (unless there are
@@ -276,7 +287,7 @@ public class CommonTermsQuery extends Qu
* @param min
* the number of optional clauses that must match
*/
- public void setMinimumNumberShouldMatch(int min) {
+ public void setMinimumNumberShouldMatch(float min) {
this.minNrShouldMatch = min;
}
@@ -284,7 +295,7 @@ public class CommonTermsQuery extends Qu
* Gets the minimum number of the optional BooleanClauses which must be
* satisfied.
*/
- public int getMinimumNumberShouldMatch() {
+ public float getMinimumNumberShouldMatch() {
return minNrShouldMatch;
}
@@ -332,7 +343,7 @@ public class CommonTermsQuery extends Qu
result = prime * result
+ ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
result = prime * result + Float.floatToIntBits(maxTermFrequency);
- result = prime * result + minNrShouldMatch;
+ result = prime * result + Float.floatToIntBits(minNrShouldMatch);
result = prime * result + ((terms == null) ? 0 : terms.hashCode());
return result;
}
Modified: lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java?rev=1439450&r1=1439449&r2=1439450&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java Mon Jan 28 15:36:31 2013
@@ -175,6 +175,90 @@ public class CommonTermsQueryTest extend
}
}
+ public void testMinShouldMatch() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ String[] docs = new String[] {"this is the end of the world right",
+ "is this it or maybe not",
+ "this is the end of the universe as we know it",
+ "there is the famous restaurant at the end of the universe",};
+ for (int i = 0; i < docs.length; i++) {
+ Document doc = new Document();
+ doc.add(newStringField("id", "" + i, Field.Store.YES));
+ doc.add(newTextField("field", docs[i], Field.Store.NO));
+ w.addDocument(doc);
+ }
+
+ IndexReader r = w.getReader();
+ IndexSearcher s = newSearcher(r);
+ {
+ CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+ random().nextBoolean() ? 2.0f : 0.5f);
+ query.add(new Term("field", "is"));
+ query.add(new Term("field", "this"));
+ query.add(new Term("field", "end"));
+ query.add(new Term("field", "world"));
+ query.add(new Term("field", "universe"));
+ query.add(new Term("field", "right"));
+ query.setMinimumNumberShouldMatch(0.5f);
+ TopDocs search = s.search(query, 10);
+ assertEquals(search.totalHits, 1);
+ assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+ }
+ {
+ CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+ random().nextBoolean() ? 2.0f : 0.5f);
+ query.add(new Term("field", "is"));
+ query.add(new Term("field", "this"));
+ query.add(new Term("field", "end"));
+ query.add(new Term("field", "world"));
+ query.add(new Term("field", "universe"));
+ query.add(new Term("field", "right"));
+ query.setMinimumNumberShouldMatch(2.0f);
+ TopDocs search = s.search(query, 10);
+ assertEquals(search.totalHits, 1);
+ assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+ }
+
+ {
+ CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+ random().nextBoolean() ? 2.0f : 0.5f);
+ query.add(new Term("field", "is"));
+ query.add(new Term("field", "this"));
+ query.add(new Term("field", "end"));
+ query.add(new Term("field", "world"));
+ query.add(new Term("field", "universe"));
+ query.add(new Term("field", "right"));
+ query.setMinimumNumberShouldMatch(0.49f);
+ TopDocs search = s.search(query, 10);
+ assertEquals(search.totalHits, 3);
+ assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+ assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
+ assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
+ }
+
+ {
+ CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+ random().nextBoolean() ? 2.0f : 0.5f);
+ query.add(new Term("field", "is"));
+ query.add(new Term("field", "this"));
+ query.add(new Term("field", "end"));
+ query.add(new Term("field", "world"));
+ query.add(new Term("field", "universe"));
+ query.add(new Term("field", "right"));
+ query.setMinimumNumberShouldMatch(1.0f);
+ TopDocs search = s.search(query, 10);
+ assertEquals(search.totalHits, 3);
+ assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+ assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
+ assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
+ }
+
+ r.close();
+ w.close();
+ dir.close();
+ }
+
public void testIllegalOccur() {
Random random = random();