You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2014/08/07 20:00:12 UTC
svn commit: r1616558 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/search/QParserPlugin.java
core/src/java/org/apache/solr/search/TermsQParserPlugin.java
core/src/test/org/apache/solr/search/TestQueryTypes.java
Author: dsmiley
Date: Thu Aug 7 18:00:11 2014
New Revision: 1616558
URL: http://svn.apache.org/r1616558
Log:
SOLR-6318: New terms QParser
Added:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java (with props)
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1616558&r1=1616557&r2=1616558&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Aug 7 18:00:11 2014
@@ -170,6 +170,9 @@ New Features
* SOLR-6302: UpdateRequestHandlers are registered implicitly /update ,
/update/json, /update/csv , /update/json/docs (Noble Paul)
+* SOLR-6318: New "terms" QParser for efficiently filtering documents by a list of values. For
+ many values, it's more appropriate than a boolean query. (David Smiley)
+
Bug Fixes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/QParserPlugin.java?rev=1616558&r1=1616557&r2=1616558&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/QParserPlugin.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/QParserPlugin.java Thu Aug 7 18:00:11 2014
@@ -48,6 +48,7 @@ public abstract class QParserPlugin impl
FieldQParserPlugin.NAME, FieldQParserPlugin.class,
RawQParserPlugin.NAME, RawQParserPlugin.class,
TermQParserPlugin.NAME, TermQParserPlugin.class,
+ TermsQParserPlugin.NAME, TermsQParserPlugin.class,
NestedQParserPlugin.NAME, NestedQParserPlugin.class,
FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class,
SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class,
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java?rev=1616558&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/TermsQParserPlugin.java Thu Aug 7 18:00:11 2014
@@ -0,0 +1,140 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.TermsFilter;
+import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocValuesTermsFilter;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.MultiTermQueryWrapperFilter;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.Automata;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+/**
+ * Finds documents whose specified field has any of the specified values. It's like
+ * {@link TermQParserPlugin} but multi-valued, and supports a variety of internal algorithms.
+ * <br>Parameters:
+ * <br><code>f</code>: The field name (mandatory)
+ * <br><code>separator</code>: the separator delimiting the values in the query string. By
+ * default it's a " " which is special in that it splits on any consecutive whitespace.
+ * <br><code>method</code>: Any of termsFilter (default), booleanQuery, automaton, docValuesTermsFilter.
+ * <p>
+ * Note that if no values are specified then the query matches no documents.
+ */
+public class TermsQParserPlugin extends QParserPlugin {
+ public static final String NAME = "terms";
+
+ /** The separator to use in the underlying suggester */
+ public static final String SEPARATOR = "separator";
+
+ /** Choose the internal algorithm */
+ private static final String METHOD = "method";
+
+ @Override
+ public void init(NamedList args) {
+ }
+
+ private static enum Method {
+ termsFilter {
+ @Override
+ Filter makeFilter(String fname, BytesRef[] bytesRefs) {
+ return new TermsFilter(fname, bytesRefs);
+ }
+ },
+ booleanQuery {
+ @Override
+ Filter makeFilter(String fname, BytesRef[] byteRefs) {
+ BooleanQuery bq = new BooleanQuery(true);
+ for (BytesRef byteRef : byteRefs) {
+ bq.add(new TermQuery(new Term(fname, byteRef)), BooleanClause.Occur.SHOULD);
+ }
+ return new QueryWrapperFilter(bq);
+ }
+ },
+ automaton {
+ @Override
+ Filter makeFilter(String fname, BytesRef[] byteRefs) {
+ Automaton union = Automata.makeStringUnion(Arrays.asList(byteRefs));
+ return new MultiTermQueryWrapperFilter<AutomatonQuery>(new AutomatonQuery(new Term(fname), union)) {
+ };
+ }
+ },
+ docValuesTermsFilter {//on 4x this is FieldCacheTermsFilter but we use the 5x name any way
+ //note: limited to one val per doc
+ @Override
+ Filter makeFilter(String fname, BytesRef[] byteRefs) {
+ return new DocValuesTermsFilter(fname, byteRefs);
+ }
+ };
+
+ abstract Filter makeFilter(String fname, BytesRef[] byteRefs);
+ }
+
+ @Override
+ public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ return new QParser(qstr, localParams, params, req) {
+ @Override
+ public Query parse() throws SyntaxError {
+ String fname = localParams.get(QueryParsing.F);
+ FieldType ft = req.getSchema().getFieldTypeNoEx(fname);
+ String separator = localParams.get(SEPARATOR, " ");
+ String qstr = localParams.get(QueryParsing.V);//never null
+ Method method = Method.valueOf(localParams.get(METHOD, Method.termsFilter.name()));
+ //TODO pick the default method based on various heuristics from benchmarks
+
+ //if space then split on all whitespace & trim, otherwise strictly interpret
+ final boolean sepIsSpace = separator.equals(" ");
+ if (sepIsSpace)
+ qstr = qstr.trim();
+ if (qstr.length() == 0)
+ return new BooleanQuery();//Matches nothing.
+ final String[] splitVals = sepIsSpace ? qstr.split("\\s+") : qstr.split(Pattern.quote(separator), -1);
+ assert splitVals.length > 0;
+
+ BytesRef[] bytesRefs = new BytesRef[splitVals.length];
+ for (int i = 0; i < splitVals.length; i++) {
+ String stringVal = splitVals[i];
+ //logic same as TermQParserPlugin
+ BytesRef term = new BytesRef();
+ if (ft != null) {
+ ft.readableToIndexed(stringVal, term);
+ } else {
+ term.copyChars(stringVal);
+ }
+ bytesRefs[i] = term;
+ }
+
+ return new SolrConstantScoreQuery(method.makeFilter(fname, bytesRefs));
+ }
+ };
+ }
+}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java?rev=1616558&r1=1616557&r2=1616558&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestQueryTypes.java Thu Aug 7 18:00:11 2014
@@ -30,21 +30,6 @@ public class TestQueryTypes extends Abst
public String getCoreName() { return "basic"; }
-
- @Override
- public void setUp() throws Exception {
- // if you override setUp or tearDown, you better call
- // the super classes version
- super.setUp();
- }
- @Override
- public void tearDown() throws Exception {
- // if you override setUp or tearDown, you better call
- // the super classes version
- super.tearDown();
- }
-
-
public void testQueryTypes() {
assertU(adoc("id","0"));
assertU(adoc("id","1", "v_t","Hello Dude"));
@@ -98,12 +83,36 @@ public class TestQueryTypes extends Abst
,"//result[@numFound='1']"
);
+ // term qparser
+ assertQ(req( "q", "{!term f="+f+"}"+v)
+ ,"//result[@numFound='1']"
+ );
+
+ // terms qparser
+ //wrap in spaces if space separated
+ final String separator = f == "v_s" ? "separator='|'" : "";//defaults to space separated
+ String vMod = separator == "" && random().nextBoolean() ? " " + v + " " : v;
+ assertQ(req( "q", "{!terms " + separator + " f=" +f+"}"+vMod)
+ ,"//result[@numFound='1']"
+ );
+
// lucene range
assertQ(req( "q", f + ":[\"" + v + "\" TO \"" + v + "\"]" )
,"//result[@numFound='1']"
);
}
+ // terms qparser, no values matches nothing
+ assertQ(req( "q", "*:*", "fq", "{!terms f=v_s}")
+ ,"//result[@numFound='0']"
+ );
+
+ String termsMethod = new String[]{"termsFilter", "booleanQuery", "automaton", "docValuesTermsFilter"}[random().nextInt(4)];
+ assertQ(req( "q", "{!terms f=v_s method=" + termsMethod + " separator=|}other stuff|wow dude")
+ ,"//result[@numFound='2']"
+ );
+
+
// frange and function query only work on single valued field types
Object[] fc_vals = new Object[] {
"id",999.0