You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2017/01/06 21:47:30 UTC

[10/25] lucene-solr:jira/solr-8593: SOLR-7466: reverse-aware leading wildcards in complexphrase query parser

SOLR-7466: reverse-aware leading wildcards in complexphrase query parser


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d3f83bb9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d3f83bb9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d3f83bb9

Branch: refs/heads/jira/solr-8593
Commit: d3f83bb948fd44e66099ef9537363ecef5bdb0f3
Parents: f87efac
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Fri Dec 30 00:01:20 2016 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Wed Jan 4 18:11:02 2017 +0300

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   5 +
 .../apache/solr/parser/SolrQueryParserBase.java |  26 ++---
 .../solr/search/ComplexPhraseQParserPlugin.java |  70 +++++++++++-
 .../TestComplexPhraseLeadingWildcard.java       | 113 +++++++++++++++++++
 4 files changed, 196 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a499cc8..ff1fdc7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -119,6 +119,8 @@ Upgrade Notes
   risk in overlapping commits.  Nonetheless users should continue to avoid excessive committing. Users are
   advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
 
+* SOLR-7466: complexphrase query parser now supports leading wildcards, beware of its' possible heaviness. 
+  Users are encouraged to use ReversedWildcardFilter in index time analysis.    
 
 New Features
 ----------------------
@@ -220,6 +222,9 @@ New Features
 
 * SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein)
 
+* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with  ReversedWildcardFilterFactory 
+  when it's provided (Mikhail Khludnev)
+
 Optimizations
 ----------------------
 * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index f54e9e9..84ffcb9 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -63,6 +63,8 @@ import org.apache.solr.search.SyntaxError;
  */
 public abstract class SolrQueryParserBase extends QueryBuilder {
 
+  protected static final String REVERSE_WILDCARD_LOWER_BOUND = new String(new char[]{ReverseStringFilter.START_OF_HEADING_MARKER + 1});
+
   public static final int TERMS_QUERY_THRESHOLD = 16;   // @lucene.internal Set to a low value temporarily for better test coverage
 
   static final int CONJ_NONE   = 0;
@@ -889,28 +891,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
     return newFieldQuery(getAnalyzer(), field, queryText, quoted);
   }
 
+ protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
+   checkNullField(field);
+   SchemaField sf = schema.getField(field);
 
+   return part1 == null && getReversedWildcardFilterFactory(sf.getType())!=null;
+ }
 
   // called from parser
   protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
+    boolean reverse = isRangeShouldBeProtectedFromReverse(field, part1);
+    return getRangeQueryImpl(field, reverse ? REVERSE_WILDCARD_LOWER_BOUND : part1, part2, startInclusive || reverse, endInclusive);
+  }
+
+  protected Query getRangeQueryImpl(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
     checkNullField(field);
     SchemaField sf = schema.getField(field);
-
-    if (part1 == null) {
-      ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType());
-      if (factory != null) {
-        // There will be reversed tokens starting with u0001 that we want to exclude, so
-        // lets start at u0002 inclusive instead.
-        char[] buf = new char[1];
-        buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1;
-        part1 = new String(buf);
-        startInclusive = true;
-      }
-    }
-
     return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
   }
-
   // called from parser
   protected Query getPrefixQuery(String field, String termStr) throws SyntaxError {
     checkNullField(field);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
index 2904de9..22702dc 100644
--- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
@@ -18,6 +18,7 @@ package org.apache.solr.search;
 
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
+import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
@@ -60,6 +61,32 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
    */
   class ComplexPhraseQParser extends QParser {
 
+    final class SolrQueryParserDelegate extends SolrQueryParser {
+      private SolrQueryParserDelegate(QParser parser, String defaultField) {
+        super(parser, defaultField);
+      }
+
+      @Override
+      protected org.apache.lucene.search.Query getWildcardQuery(String field, String termStr) throws SyntaxError {
+        return super.getWildcardQuery(field, termStr);
+      }
+      
+      @Override
+      protected org.apache.lucene.search.Query getRangeQuery(String field, String part1, String part2,
+          boolean startInclusive, boolean endInclusive) throws SyntaxError {
+        return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
+      }
+      
+      @Override
+      protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1) {
+        return super.isRangeShouldBeProtectedFromReverse(field, part1);
+      }
+
+      public String getLowerBoundForReverse() {
+        return REVERSE_WILDCARD_LOWER_BOUND;
+      }
+    }
+
     ComplexPhraseQueryParser lparser;
 
     boolean inOrder = true;
@@ -87,11 +114,46 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
         defaultField = getReq().getSchema().getDefaultSearchFieldName();
       }
 
-      lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer());
-
-      if (localParams != null)
+      SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField);
+      
+      lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer())
+          {
+              protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
+                try {
+                  org.apache.lucene.search.Query wildcardQuery = reverseAwareParser.getWildcardQuery(t.field(), t.text());
+                  setRewriteMethod(wildcardQuery);
+                  return wildcardQuery;
+                } catch (SyntaxError e) {
+                  throw new RuntimeException(e);
+                }
+              }
+
+              private Query setRewriteMethod(org.apache.lucene.search.Query query) {
+                if (query instanceof MultiTermQuery) {
+                  ((MultiTermQuery) query).setRewriteMethod(
+                      org.apache.lucene.search.MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+                }
+                return query;
+              }
+              
+              protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive,
+                  boolean endInclusive) {
+                boolean reverse = reverseAwareParser.isRangeShouldBeProtectedFromReverse(field, part1);
+                return super.newRangeQuery(field, 
+                                            reverse ? reverseAwareParser.getLowerBoundForReverse() : part1, 
+                                            part2,
+                                            startInclusive || reverse, 
+                                            endInclusive);
+              }
+          }
+          ;
+
+      lparser.setAllowLeadingWildcard(true);
+          
+      if (localParams != null) {
         inOrder = localParams.getBool("inOrder", inOrder);
-
+      }
+      
       lparser.setInOrder(inOrder);
 
       QueryParser.Operator defaultOperator = QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java
new file mode 100644
index 0000000..6c48cc3
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestComplexPhraseLeadingWildcard extends SolrTestCaseJ4 { 
+
+  private static final String noReverseText = "three";
+  private static final String withOriginal = "one";
+  private static final String withoutOriginal = "two";
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig.xml","schema-reversed.xml");
+    assertU(doc123(1, "one ever"));
+    assertU(doc123(2, "once forever"));
+                      
+    assertU(doc123(7, "once slope forever"));
+    assertU(doc123(8, "once again slope forever"));
+    assertU(doc123(9, "forever once"));
+    assertU(commit());
+  }
+  
+  @Test
+  public void testReverseWithOriginal() throws Exception {
+    checkField(withOriginal);
+    
+  }
+
+  // prefix query won't match without original tokens
+  @Test
+  public void testReverseWithoutOriginal() throws Exception {
+    assertQ( "prefix query doesn't work without original term",
+        req("q","{!complexphrase inOrder=true}\"on* for*\"",
+            "df",withoutOriginal),
+        expect());
+    
+    assertQ("postfix query works fine even without original",
+        req("q","{!complexphrase inOrder=true}\"*nce *ver\"",
+            "df",withoutOriginal),
+        expect("2"));
+  }
+  
+  @Test
+  public void testWithoutReverse() throws Exception {
+    checkField(noReverseText);
+  }
+
+  private void checkField(String field) {
+    assertQ(
+        req("q","{!complexphrase inOrder=true}\"on* *ver\"",
+            "df",field,
+            "indent","on",
+            "debugQuery", "true"),
+        expect("1","2"));
+    
+    assertQ(
+        req("q","{!complexphrase inOrder=true}\"ON* *VER\"",
+            "df",field), 
+        expect("1","2"));
+    
+    assertQ(
+        req("q","{!complexphrase inOrder=true}\"ON* *ver\"",
+            "df",field), 
+        expect("1","2"));
+    
+    assertQ(
+        req("q","{!complexphrase inOrder=true}\"on* *ver\"~1",
+            "df",field),
+        expect("1","2","7"));
+    
+    assertQ("range works if reverse doesn't mess",
+        req("q","{!complexphrase inOrder=true}\"on* [* TO a]\"",
+            "df",field),
+        expect());
+
+    assertQ("range works if reverse doesn't mess",
+        req("q","{!complexphrase inOrder=true}\"[on TO onZ] for*\"",
+            "df",field),
+        expect("2"));
+  } 
+  
+  private static String doc123(int id, String text){
+    return adoc("id",""+id, withOriginal, text, withoutOriginal, text, noReverseText, text);
+  }
+  
+  private static String [] expect(String ...ids) {
+    String[] xpathes = new String[ids.length+1];
+    xpathes[0]= "//result[@numFound=" +ids.length+ "]";
+    int i=1;
+    for(String id : ids) {
+      xpathes[i++] = "//doc/int[@name='id' and text()='"+id+"']";
+    }
+    return xpathes;
+  }
+}