You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2015/11/23 19:34:29 UTC
svn commit: r1715911 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/test/org/apache/lucene/search/
Author: dsmiley
Date: Mon Nov 23 18:34:29 2015
New Revision: 1715911
URL: http://svn.apache.org/viewvc?rev=1715911&view=rev
Log:
LUCENE-6801: Clarify javadocs of PhraseQuery, MultiPhraseQuery RE terms at same position. Add test.
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1715911&r1=1715910&r2=1715911&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Mon Nov 23 18:34:29 2015
@@ -284,6 +284,10 @@ Other
* LUCENE-6902: Don't retry to fsync files / directories; fail
immediately. (Daniel Mitterdorfer, Uwe Schindler)
+* LUCENE-6801: Clarify JavaDocs of PhraseQuery that it in fact supports terms
+ at the same position (as does MultiPhraseQuery), treated like a conjunction.
+ Added test. (David Smiley, Adrien Grand)
+
Build
* LUCENE-6732: Improve checker for invalid source patterns to also
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1715911&r1=1715910&r2=1715911&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon Nov 23 18:34:29 2015
@@ -38,18 +38,19 @@ import org.apache.lucene.util.PriorityQu
import org.apache.lucene.util.ToStringUtils;
/**
- * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
- * method {@link #add(Term[])}.
- * To use this class, to search for the phrase "Microsoft app*" first use
- * add(Term) on the term "Microsoft", then find all terms that have "app" as
- * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[]
- * terms) to add them to the query.
- *
+ * A generalized version of {@link PhraseQuery}, with an added
+ * method {@link #add(Term[])} for adding more than one term at the same position
+ * that are treated as a disjunction (OR).
+ * To use this class to search for the phrase "Microsoft app*" first use
+ * {@link #add(Term)} on the term "microsoft" (assuming lowercase analysis), then
+ * find all terms that have "app" as prefix using {@link LeafReader#terms(String)},
+ * seeking to "app" then iterating and collecting terms until there is no longer
+ * that prefix, and finally use {@link #add(Term[])} to add them to the query.
*/
public class MultiPhraseQuery extends Query {
- private String field;
- private ArrayList<Term[]> termArrays = new ArrayList<>();
- private ArrayList<Integer> positions = new ArrayList<>();
+ private String field;// becomes non-null on first add() then is unmodified
+ private final ArrayList<Term[]> termArrays = new ArrayList<>();
+ private final ArrayList<Integer> positions = new ArrayList<>();
private int slop = 0;
@@ -73,38 +74,41 @@ public class MultiPhraseQuery extends Qu
public void add(Term term) { add(new Term[]{term}); }
/** Add multiple terms at the next position in the phrase. Any of the terms
- * may match.
+ * may match (a disjunction).
+ * The array is not copied or mutated, the caller should consider it
+ * immutable subsequent to calling this method.
*/
public void add(Term[] terms) {
int position = 0;
if (positions.size() > 0)
- position = positions.get(positions.size()-1).intValue() + 1;
+ position = positions.get(positions.size() - 1) + 1;
add(terms, position);
}
/**
* Allows to specify the relative position of terms within the phrase.
+ * The array is not copied or mutated, the caller should consider it
+ * immutable subsequent to calling this method.
*/
public void add(Term[] terms, int position) {
Objects.requireNonNull(terms, "Term array must not be null");
if (termArrays.size() == 0)
field = terms[0].field();
- for (int i = 0; i < terms.length; i++) {
- if (!terms[i].field().equals(field)) {
+ for (Term term : terms) {
+ if (!term.field().equals(field)) {
throw new IllegalArgumentException(
- "All phrase terms must be in the same field (" + field + "): "
- + terms[i]);
+ "All phrase terms must be in the same field (" + field + "): " + term);
}
}
termArrays.add(terms);
- positions.add(Integer.valueOf(position));
+ positions.add(position);
}
/**
- * Returns a List of the terms in the multiphrase.
+ * Returns a List of the terms in the multi-phrase.
* Do not modify the List or its contents.
*/
public List<Term[]> getTermArrays() {
@@ -117,7 +121,7 @@ public class MultiPhraseQuery extends Qu
public int[] getPositions() {
int[] result = new int[positions.size()];
for (int i = 0; i < positions.size(); i++)
- result[i] = positions.get(i).intValue();
+ result[i] = positions.get(i);
return result;
}
@@ -155,9 +159,7 @@ public class MultiPhraseQuery extends Qu
@Override
public void extractTerms(Set<Term> terms) {
for (final Term[] arr : termArrays) {
- for (final Term term: arr) {
- terms.add(term);
- }
+ Collections.addAll(terms, arr);
}
}
@@ -185,7 +187,8 @@ public class MultiPhraseQuery extends Qu
// TODO: move this check to createWeight to happen earlier to the user?
if (fieldTerms.hasPositions() == false) {
- throw new IllegalStateException("field \"" + field + "\" was indexed without position data; cannot run MultiPhraseQuery (phrase=" + getQuery() + ")");
+ throw new IllegalStateException("field \"" + field + "\" was indexed without position data;" +
+ " cannot run MultiPhraseQuery (phrase=" + getQuery() + ")");
}
// Reuse single TermsEnum below:
@@ -267,8 +270,8 @@ public class MultiPhraseQuery extends Qu
Term[] terms = termArrays.get(0);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.setDisableCoord(true);
- for (int i=0; i<terms.length; i++) {
- builder.add(new TermQuery(terms[i]), BooleanClause.Occur.SHOULD);
+ for (Term term : terms) {
+ builder.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
}
return builder.build();
} else {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1715911&r1=1715910&r2=1715911&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java Mon Nov 23 18:34:29 2015
@@ -48,7 +48,11 @@ import org.apache.lucene.util.ToStringUt
*
* <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
*
- * <b>NOTE</b>: Leading holes don't have any particular meaning for this query
+ * <p><b>NOTE</b>:
+ * All terms in the phrase must match, even those at the same position. If you
+ * have terms at the same position, perhaps synonyms, you probably want {@link MultiPhraseQuery}
+ * instead which only requires one term at a position to match.
+ * <br >Also, Leading holes don't have any particular meaning for this query
* and will be ignored. For instance this query:
* <pre class="prettyprint">
* PhraseQuery.Builder builder = new PhraseQuery.Builder();
@@ -99,10 +103,12 @@ public class PhraseQuery extends Query {
/**
* Adds a term to the end of the query phrase.
- * The relative position of the term within the phrase is specified explicitly.
- * This allows e.g. phrases with more than one term at the same position
- * or phrases with gaps (e.g. in connection with stopwords).
- *
+ * The relative position of the term within the phrase is specified explicitly, but must be greater than
+ * or equal to that of the previously added term.
+ * A greater position allows phrases with gaps (e.g. in connection with stopwords).
+ * If the position is equal, you most likely should be using
+ * {@link MultiPhraseQuery} instead which only requires one term at each position to match; this class requires
+ * all of them.
*/
public Builder add(Term term, int position) {
if (position < 0) {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=1715911&r1=1715910&r2=1715911&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java Mon Nov 23 18:34:29 2015
@@ -23,19 +23,23 @@ import java.util.List;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
@@ -549,6 +553,52 @@ public class TestPhraseQuery extends Luc
assertTrue(rewritten instanceof TermQuery);
}
+ /** Tests PhraseQuery with terms at the same position in the query. */
+ public void testZeroPosIncr() throws IOException {
+ Directory dir = newDirectory();
+ final Token[] tokens = new Token[3];
+ tokens[0] = new Token();
+ tokens[0].append("a");
+ tokens[0].setPositionIncrement(1);
+ tokens[1] = new Token();
+ tokens[1].append("aa");
+ tokens[1].setPositionIncrement(0);
+ tokens[2] = new Token();
+ tokens[2].append("b");
+ tokens[2].setPositionIncrement(1);
+
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ doc.add(new TextField("field", new CannedTokenStream(tokens)));
+ writer.addDocument(doc);
+ IndexReader r = writer.getReader();
+ writer.close();
+ IndexSearcher searcher = newSearcher(r);
+
+ // Sanity check; simple "a b" phrase:
+ PhraseQuery.Builder pqBuilder = new PhraseQuery.Builder();
+ pqBuilder.add(new Term("field", "a"), 0);
+ pqBuilder.add(new Term("field", "b"), 1);
+ assertEquals(1, searcher.search(pqBuilder.build(), 1).totalHits);
+
+ // Now with "a|aa b"
+ pqBuilder = new PhraseQuery.Builder();
+ pqBuilder.add(new Term("field", "a"), 0);
+ pqBuilder.add(new Term("field", "aa"), 0);
+ pqBuilder.add(new Term("field", "b"), 1);
+ assertEquals(1, searcher.search(pqBuilder.build(), 1).totalHits);
+
+ // Now with "a|z b" which should not match; this isn't a MultiPhraseQuery
+ pqBuilder = new PhraseQuery.Builder();
+ pqBuilder.add(new Term("field", "a"), 0);
+ pqBuilder.add(new Term("field", "z"), 0);
+ pqBuilder.add(new Term("field", "b"), 1);
+ assertEquals(0, searcher.search(pqBuilder.build(), 1).totalHits);
+
+ r.close();
+ dir.close();
+ }
+
public void testRandomPhrases() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new MockAnalyzer(random());