You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2017/03/09 09:23:14 UTC

[12/19] lucene-solr:jira/solr-9835: LUCENE-7695: support synonyms in ComplexPhraseQueryParser

LUCENE-7695: support synonyms in ComplexPhraseQueryParser


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8a549293
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8a549293
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8a549293

Branch: refs/heads/jira/solr-9835
Commit: 8a5492930eff393de824450f77f27d98a204df3d
Parents: d844207
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Sun Mar 5 12:24:47 2017 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Wed Mar 8 11:20:35 2017 +0300

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../complexPhrase/ComplexPhraseQueryParser.java | 21 +++++++++---
 .../complexPhrase/TestComplexPhraseQuery.java   | 36 +++++++++++++++++---
 3 files changed, 52 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a549293/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b067fde..a8f7ee4 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -204,6 +204,9 @@ Improvements
   IndexInput description instead of plain IOException (Mike Drob via
   Mike McCandless)
 
+* LUCENE-7695: ComplexPhraseQueryParser to support query time synonyms (Markus Jelsma
+  via Mikhail Khludnev) 
+
 Optimizations
 
 * LUCENE-7641: Optimized point range queries to compute documents that do not

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a549293/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
index 6e18960..32f4fb3 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.IndexSearcher;
@@ -35,6 +36,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SynonymQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.spans.SpanBoostQuery;
 import org.apache.lucene.search.spans.SpanNearQuery;
@@ -257,6 +259,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
       // ArrayList spanClauses = new ArrayList();
       if (contents instanceof TermQuery 
           || contents instanceof MultiTermQuery
+          || contents instanceof SynonymQuery
           ) {
         return contents;
       }
@@ -287,9 +290,11 @@ public class ComplexPhraseQueryParser extends QueryParser {
           qc = ((BoostQuery) qc).getQuery();
         }
 
-        if (qc instanceof BooleanQuery) {
+        if (qc instanceof BooleanQuery || qc instanceof SynonymQuery) {
           ArrayList<SpanQuery> sc = new ArrayList<>();
-          addComplexPhraseClause(sc, (BooleanQuery) qc);
+          BooleanQuery booleanCaluse = qc instanceof BooleanQuery ?
+              (BooleanQuery) qc : convert((SynonymQuery) qc);
+          addComplexPhraseClause(sc, booleanCaluse);
           if (sc.size() > 0) {
             allSpanClauses[i] = sc.get(0);
           } else {
@@ -309,14 +314,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
           if (qc instanceof TermQuery) {
             TermQuery tq = (TermQuery) qc;
             allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
-          } else {
+            } else { 
             throw new IllegalArgumentException("Unknown query type \""
                 + qc.getClass().getName()
                 + "\" found in phrase query string \""
                 + phrasedQueryStringContents + "\"");
           }
-
         }
+
         i += 1;
       }
       if (numNegatives == 0) {
@@ -354,6 +359,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
       return snot;
     }
 
+    private BooleanQuery convert(SynonymQuery qc) {
+      BooleanQuery.Builder bqb = new BooleanQuery.Builder();
+      for (Term t : qc.getTerms()){
+        bqb.add(new BooleanClause(new TermQuery(t), Occur.SHOULD));
+      }
+      return bqb.build();
+    }
+
     private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
       ArrayList<SpanQuery> ors = new ArrayList<>();
       ArrayList<SpanQuery> nots = new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a549293/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
index 28b600b..5c45e28 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/complexPhrase/TestComplexPhraseQuery.java
@@ -20,6 +20,7 @@ import java.util.HashSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockSynonymAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.DirectoryReader;
@@ -39,7 +40,11 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
       new DocData("john smith", "1", "developer"),
       new DocData("johathon smith", "2", "developer"),
       new DocData("john percival smith", "3", "designer"),
-      new DocData("jackson waits tom", "4", "project manager")
+      new DocData("jackson waits tom", "4", "project manager"),
+      new DocData("johny perkins", "5", "orders pizza"),
+      new DocData("hapax neverson", "6", "never matches"),
+      new DocData("dog cigar", "7", "just for synonyms"),
+      new DocData("dogs don't smoke cigarettes", "8", "just for synonyms"),
   };
 
   private IndexSearcher searcher;
@@ -73,12 +78,30 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
   }
 
   public void testSingleTermPhrase() throws Exception {
-    checkMatches("\"joh*\" \"tom\"", "1,2,3,4"); 
+    checkMatches("\"joh*\"","1,2,3,5");
+    checkMatches("\"joh~\"","1,3,5");
+    checkMatches("\"joh*\" \"tom\"", "1,2,3,4,5"); 
     checkMatches("+\"j*\" +\"tom\"", "4"); 
-    checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3");
+    checkMatches("\"jo*\" \"[sma TO smZ]\" ", "1,2,3,5,8");
     checkMatches("+\"j*hn\" +\"sm*h\"", "1,3"); 
   }
 
+  public void testSynonyms() throws Exception {
+    checkMatches("\"dogs\"","8");
+    MockSynonymAnalyzer synonym = new MockSynonymAnalyzer();
+    checkMatches("\"dogs\"","7,8",synonym);
+    // synonym is unidirectional 
+    checkMatches("\"dog\"","7",synonym);
+    checkMatches("\"dogs cigar*\"","");
+    checkMatches("\"dog cigar*\"","7");
+    checkMatches("\"dogs cigar*\"","7", synonym);
+    checkMatches("\"dog cigar*\"","7", synonym);
+    checkMatches("\"dogs cigar*\"~2","7,8", synonym);
+    // synonym is unidirectional
+    checkMatches("\"dog cigar*\"~2","7", synonym);
+    
+  }
+  
   public void testUnOrderedProximitySearches() throws Exception {
 
     inOrder = true;
@@ -98,8 +121,13 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
   }
 
   private void checkMatches(String qString, String expectedVals)
+  throws Exception {
+    checkMatches(qString, expectedVals, analyzer);
+  }
+
+  private void checkMatches(String qString, String expectedVals, Analyzer anAnalyzer)
       throws Exception {
-    ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
+    ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, anAnalyzer);
     qp.setInOrder(inOrder);
     qp.setFuzzyPrefixLength(1); // usually a good idea