You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2017/03/09 09:23:14 UTC
[12/19] lucene-solr:jira/solr-9835: LUCENE-7695: support synonyms in
ComplexPhraseQueryParser
LUCENE-7695: support synonyms in ComplexPhraseQueryParser
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8a549293
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8a549293
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8a549293
Branch: refs/heads/jira/solr-9835
Commit: 8a5492930eff393de824450f77f27d98a204df3d
Parents: d844207
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Sun Mar 5 12:24:47 2017 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Wed Mar 8 11:20:35 2017 +0300
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 ++
.../complexPhrase/ComplexPhraseQueryParser.java | 21 +++++++++---
.../complexPhrase/TestComplexPhraseQuery.java | 36 +++++++++++++++++---
3 files changed, 52 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a549293/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b067fde..a8f7ee4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -204,6 +204,9 @@ Improvements
IndexInput description instead of plain IOException (Mike Drob via
Mike McCandless)
+* LUCENE-7695: ComplexPhraseQueryParser to support query time synonyms (Markus Jelsma
+ via Mikhail Khludnev)
+
Optimizations
* LUCENE-7641: Optimized point range queries to compute documents that do not
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a549293/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
index 6e18960..32f4fb3 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -35,6 +36,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
@@ -257,6 +259,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
// ArrayList spanClauses = new ArrayList();
if (contents instanceof TermQuery
|| contents instanceof MultiTermQuery
+ || contents instanceof SynonymQuery
) {
return contents;
}
@@ -287,9 +290,11 @@ public class ComplexPhraseQueryParser extends QueryParser {
qc = ((BoostQuery) qc).getQuery();
}
- if (qc instanceof BooleanQuery) {
+ if (qc instanceof BooleanQuery || qc instanceof SynonymQuery) {
ArrayList<SpanQuery> sc = new ArrayList<>();
- addComplexPhraseClause(sc, (BooleanQuery) qc);
+ BooleanQuery booleanCaluse = qc instanceof BooleanQuery ?
+ (BooleanQuery) qc : convert((SynonymQuery) qc);
+ addComplexPhraseClause(sc, booleanCaluse);
if (sc.size() > 0) {
allSpanClauses[i] = sc.get(0);
} else {
@@ -309,14 +314,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
if (qc instanceof TermQuery) {
TermQuery tq = (TermQuery) qc;
allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
- } else {
+ } else {
throw new IllegalArgumentException("Unknown query type \""
+ qc.getClass().getName()
+ "\" found in phrase query string \""
+ phrasedQueryStringContents + "\"");
}
-
}
+
i += 1;
}
if (numNegatives == 0) {
@@ -354,6 +359,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
return snot;
}
+ private BooleanQuery convert(SynonymQuery qc) {
+ BooleanQuery.Builder bqb = new BooleanQuery.Builder();
+ for (Term t : qc.getTerms()){
+ bqb.add(new BooleanClause(new TermQuery(t), Occur.SHOULD));
+ }
+ return bqb.build();
+ }
+
private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
ArrayList<SpanQuery> ors = new ArrayList<>();
ArrayList<SpanQuery> nots = new ArrayList<>();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a549293/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
index 28b600b..5c45e28 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
@@ -20,6 +20,7 @@ import java.util.HashSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
@@ -39,7 +40,11 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
new DocData("john smith", "1", "developer"),
new DocData("johathon smith", "2", "developer"),
new DocData("john percival smith", "3", "designer"),
- new DocData("jackson waits tom", "4", "project manager")
+ new DocData("jackson waits tom", "4", "project manager"),
+ new DocData("johny perkins", "5", "orders pizza"),
+ new DocData("hapax neverson", "6", "never matches"),
+ new DocData("dog cigar", "7", "just for synonyms"),
+ new DocData("dogs don't smoke cigarettes", "8", "just for synonyms"),
};
private IndexSearcher searcher;
@@ -73,12 +78,30 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
}
public void testSingleTermPhrase() throws Exception {
- checkMatches("\"joh*\" \"tom\"", "1,2,3,4");
+ checkMatches("\"joh*\"","1,2,3,5");
+ checkMatches("\"joh~\"","1,3,5");
+ checkMatches("\"joh*\" \"tom\"", "1,2,3,4,5");
checkMatches("+\"j*\" +\"tom\"", "4");
- checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3");
+ checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3,5,8");
checkMatches("+\"j*hn\" +\"sm*h\"", "1,3");
}
+ public void testSynonyms() throws Exception {
+ checkMatches("\"dogs\"","8");
+ MockSynonymAnalyzer synonym = new MockSynonymAnalyzer();
+ checkMatches("\"dogs\"","7,8",synonym);
+ // synonym is unidirectional
+ checkMatches("\"dog\"","7",synonym);
+ checkMatches("\"dogs cigar*\"","");
+ checkMatches("\"dog cigar*\"","7");
+ checkMatches("\"dogs cigar*\"","7", synonym);
+ checkMatches("\"dog cigar*\"","7", synonym);
+ checkMatches("\"dogs cigar*\"~2","7,8", synonym);
+ // synonym is unidirectional
+ checkMatches("\"dog cigar*\"~2","7", synonym);
+
+ }
+
public void testUnOrderedProximitySearches() throws Exception {
inOrder = true;
@@ -98,8 +121,13 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
}
private void checkMatches(String qString, String expectedVals)
+ throws Exception {
+ checkMatches(qString, expectedVals, analyzer);
+ }
+
+ private void checkMatches(String qString, String expectedVals, Analyzer anAnalyzer)
throws Exception {
- ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
+ ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, anAnalyzer);
qp.setInOrder(inOrder);
qp.setFuzzyPrefixLength(1); // usually a good idea