You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mk...@apache.org on 2017/01/04 15:28:18 UTC
lucene-solr:master: SOLR-7466: reverse-aware leading wildcards in
complexphrase query parser
Repository: lucene-solr
Updated Branches:
refs/heads/master f87efac52 -> d3f83bb94
SOLR-7466: reverse-aware leading wildcards in complexphrase query parser
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d3f83bb9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d3f83bb9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d3f83bb9
Branch: refs/heads/master
Commit: d3f83bb948fd44e66099ef9537363ecef5bdb0f3
Parents: f87efac
Author: Mikhail Khludnev <mk...@apache.org>
Authored: Fri Dec 30 00:01:20 2016 +0300
Committer: Mikhail Khludnev <mk...@apache.org>
Committed: Wed Jan 4 18:11:02 2017 +0300
----------------------------------------------------------------------
solr/CHANGES.txt | 5 +
.../apache/solr/parser/SolrQueryParserBase.java | 26 ++---
.../solr/search/ComplexPhraseQParserPlugin.java | 70 +++++++++++-
.../TestComplexPhraseLeadingWildcard.java | 113 +++++++++++++++++++
4 files changed, 196 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a499cc8..ff1fdc7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -119,6 +119,8 @@ Upgrade Notes
risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are
advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
+* SOLR-7466: complexphrase query parser now supports leading wildcards, beware of its' possible heaviness.
+ Users are encouraged to use ReversedWildcardFilter in index time analysis.
New Features
----------------------
@@ -220,6 +222,9 @@ New Features
* SOLR-8530: Add HavingStream to Streaming API and StreamingExpressions (Joel Bernstein)
+* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory
+ when it's provided (Mikhail Khludnev)
+
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index f54e9e9..84ffcb9 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -63,6 +63,8 @@ import org.apache.solr.search.SyntaxError;
*/
public abstract class SolrQueryParserBase extends QueryBuilder {
+ protected static final String REVERSE_WILDCARD_LOWER_BOUND = new String(new char[]{ReverseStringFilter.START_OF_HEADING_MARKER + 1});
+
public static final int TERMS_QUERY_THRESHOLD = 16; // @lucene.internal Set to a low value temporarily for better test coverage
static final int CONJ_NONE = 0;
@@ -889,28 +891,24 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
return newFieldQuery(getAnalyzer(), field, queryText, quoted);
}
+ protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
+ checkNullField(field);
+ SchemaField sf = schema.getField(field);
+ return part1 == null && getReversedWildcardFilterFactory(sf.getType())!=null;
+ }
// called from parser
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
+ boolean reverse = isRangeShouldBeProtectedFromReverse(field, part1);
+ return getRangeQueryImpl(field, reverse ? REVERSE_WILDCARD_LOWER_BOUND : part1, part2, startInclusive || reverse, endInclusive);
+ }
+
+ protected Query getRangeQueryImpl(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws SyntaxError {
checkNullField(field);
SchemaField sf = schema.getField(field);
-
- if (part1 == null) {
- ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(sf.getType());
- if (factory != null) {
- // There will be reversed tokens starting with u0001 that we want to exclude, so
- // lets start at u0002 inclusive instead.
- char[] buf = new char[1];
- buf[0] = ReverseStringFilter.START_OF_HEADING_MARKER + 1;
- part1 = new String(buf);
- startInclusive = true;
- }
- }
-
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
}
-
// called from parser
protected Query getPrefixQuery(String field, String termStr) throws SyntaxError {
checkNullField(field);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
index 2904de9..22702dc 100644
--- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
@@ -18,6 +18,7 @@ package org.apache.solr.search;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
+import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
@@ -60,6 +61,32 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
*/
class ComplexPhraseQParser extends QParser {
+ final class SolrQueryParserDelegate extends SolrQueryParser {
+ private SolrQueryParserDelegate(QParser parser, String defaultField) {
+ super(parser, defaultField);
+ }
+
+ @Override
+ protected org.apache.lucene.search.Query getWildcardQuery(String field, String termStr) throws SyntaxError {
+ return super.getWildcardQuery(field, termStr);
+ }
+
+ @Override
+ protected org.apache.lucene.search.Query getRangeQuery(String field, String part1, String part2,
+ boolean startInclusive, boolean endInclusive) throws SyntaxError {
+ return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
+ }
+
+ @Override
+ protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1) {
+ return super.isRangeShouldBeProtectedFromReverse(field, part1);
+ }
+
+ public String getLowerBoundForReverse() {
+ return REVERSE_WILDCARD_LOWER_BOUND;
+ }
+ }
+
ComplexPhraseQueryParser lparser;
boolean inOrder = true;
@@ -87,11 +114,46 @@ public class ComplexPhraseQParserPlugin extends QParserPlugin {
defaultField = getReq().getSchema().getDefaultSearchFieldName();
}
- lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer());
-
- if (localParams != null)
+ SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField);
+
+ lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer())
+ {
+ protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
+ try {
+ org.apache.lucene.search.Query wildcardQuery = reverseAwareParser.getWildcardQuery(t.field(), t.text());
+ setRewriteMethod(wildcardQuery);
+ return wildcardQuery;
+ } catch (SyntaxError e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private Query setRewriteMethod(org.apache.lucene.search.Query query) {
+ if (query instanceof MultiTermQuery) {
+ ((MultiTermQuery) query).setRewriteMethod(
+ org.apache.lucene.search.MultiTermQuery.SCORING_BOOLEAN_REWRITE);
+ }
+ return query;
+ }
+
+ protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive,
+ boolean endInclusive) {
+ boolean reverse = reverseAwareParser.isRangeShouldBeProtectedFromReverse(field, part1);
+ return super.newRangeQuery(field,
+ reverse ? reverseAwareParser.getLowerBoundForReverse() : part1,
+ part2,
+ startInclusive || reverse,
+ endInclusive);
+ }
+ }
+ ;
+
+ lparser.setAllowLeadingWildcard(true);
+
+ if (localParams != null) {
inOrder = localParams.getBool("inOrder", inOrder);
-
+ }
+
lparser.setInOrder(inOrder);
QueryParser.Operator defaultOperator = QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d3f83bb9/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java
new file mode 100644
index 0000000..6c48cc3
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseLeadingWildcard.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestComplexPhraseLeadingWildcard extends SolrTestCaseJ4 {
+
+ private static final String noReverseText = "three";
+ private static final String withOriginal = "one";
+ private static final String withoutOriginal = "two";
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig.xml","schema-reversed.xml");
+ assertU(doc123(1, "one ever"));
+ assertU(doc123(2, "once forever"));
+
+ assertU(doc123(7, "once slope forever"));
+ assertU(doc123(8, "once again slope forever"));
+ assertU(doc123(9, "forever once"));
+ assertU(commit());
+ }
+
+ @Test
+ public void testReverseWithOriginal() throws Exception {
+ checkField(withOriginal);
+
+ }
+
+ // prefix query won't match without original tokens
+ @Test
+ public void testReverseWithoutOriginal() throws Exception {
+ assertQ( "prefix query doesn't work without original term",
+ req("q","{!complexphrase inOrder=true}\"on* for*\"",
+ "df",withoutOriginal),
+ expect());
+
+ assertQ("postfix query works fine even without original",
+ req("q","{!complexphrase inOrder=true}\"*nce *ver\"",
+ "df",withoutOriginal),
+ expect("2"));
+ }
+
+ @Test
+ public void testWithoutReverse() throws Exception {
+ checkField(noReverseText);
+ }
+
+ private void checkField(String field) {
+ assertQ(
+ req("q","{!complexphrase inOrder=true}\"on* *ver\"",
+ "df",field,
+ "indent","on",
+ "debugQuery", "true"),
+ expect("1","2"));
+
+ assertQ(
+ req("q","{!complexphrase inOrder=true}\"ON* *VER\"",
+ "df",field),
+ expect("1","2"));
+
+ assertQ(
+ req("q","{!complexphrase inOrder=true}\"ON* *ver\"",
+ "df",field),
+ expect("1","2"));
+
+ assertQ(
+ req("q","{!complexphrase inOrder=true}\"on* *ver\"~1",
+ "df",field),
+ expect("1","2","7"));
+
+ assertQ("range works if reverse doesn't mess",
+ req("q","{!complexphrase inOrder=true}\"on* [* TO a]\"",
+ "df",field),
+ expect());
+
+ assertQ("range works if reverse doesn't mess",
+ req("q","{!complexphrase inOrder=true}\"[on TO onZ] for*\"",
+ "df",field),
+ expect("2"));
+ }
+
+ private static String doc123(int id, String text){
+ return adoc("id",""+id, withOriginal, text, withoutOriginal, text, noReverseText, text);
+ }
+
+ private static String [] expect(String ...ids) {
+ String[] xpathes = new String[ids.length+1];
+ xpathes[0]= "//result[@numFound=" +ids.length+ "]";
+ int i=1;
+ for(String id : ids) {
+ xpathes[i++] = "//doc/int[@name='id' and text()='"+id+"']";
+ }
+ return xpathes;
+ }
+}