You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ho...@apache.org on 2006/05/21 00:17:22 UTC
svn commit: r408103 - in /incubator/solr/trunk: example/solr/conf/
src/java/org/apache/solr/request/ src/java/org/apache/solr/util/
src/java/org/apache/solr/util/doc-files/ src/test/org/apache/solr/
src/test/org/apache/solr/util/ src/test/test-files/so...
Author: hossman
Date: Sat May 20 15:17:21 2006
New Revision: 408103
URL: http://svn.apache.org/viewvc?rev=408103&view=rev
Log:
new DisMaxRequestHandler as well as some generic SolrPluginUtils
Added:
incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java
incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java
incubator/solr/trunk/src/java/org/apache/solr/util/doc-files/
incubator/solr/trunk/src/java/org/apache/solr/util/doc-files/min-should-match.html
incubator/solr/trunk/src/test/org/apache/solr/DisMaxRequestHandlerTest.java
incubator/solr/trunk/src/test/org/apache/solr/util/
incubator/solr/trunk/src/test/org/apache/solr/util/SolrPluginUtilsTest.java
Modified:
incubator/solr/trunk/example/solr/conf/solrconfig.xml
incubator/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
Modified: incubator/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/example/solr/conf/solrconfig.xml?rev=408103&r1=408102&r2=408103&view=diff
==============================================================================
--- incubator/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ incubator/solr/trunk/example/solr/conf/solrconfig.xml Sat May 20 15:17:21 2006
@@ -180,15 +180,44 @@
-->
<requestHandler name="standard" class="solr.StandardRequestHandler" />
- <!-- example of a request handler with custom parameters passed to it's init()
- <requestHandler name="example" class="myorg.mypkg.MyRequestHandler" >
- <int name="myparam">1000</int>
- <float name="ratio">1.4142135</float>
- <arr name="myarr"><int>1</int><int>2</int></arr>
- <str>foo</str>
+ <!-- DisMaxRequestHandler is an example of a request handler that
+ supports optional parameters which are passed to
+ it's init() method.
+ -->
+ <requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
+ <float name="tie">0.01</float>
+ <str name="qf">
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ </str>
+ <str name="pf">
+ text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9
+ </str>
+ <str name="bf">
+ ord(poplarity)^0.5 recip(rord(price),1,1000,1000)^0.3
+ </str>
+ <str name="fl">
+ id,name,price,score
+ </str>
+ <str name="mm">
+ 2<-1 5<-2 6<90%
+ </str>
+ <int name="ps">100</int>
</requestHandler>
- -->
-
+ <!-- Note how you can register the same handler multiple times with
+ different names (and different init parameters)
+ -->
+ <requestHandler name="instock" class="solr.DisMaxRequestHandler" >
+ <str name="fq">
+ inStock:true
+ </str>
+ <str name="qf">
+ text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
+ </str>
+ <str name="mm">
+ 2<-1 5<-2 6<90%
+ </str>
+ </requestHandler>
+
<!-- config for the admin interface -->
<admin>
<defaultQuery>solr</defaultQuery>
Added: incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java?rev=408103&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java Sat May 20 15:17:21 2006
@@ -0,0 +1,374 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.request;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.core.SolrException;
+
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.search.SolrCache;
+import org.apache.solr.search.SolrQueryParser;
+import org.apache.solr.search.QueryParsing;
+import org.apache.solr.search.CacheRegenerator;
+
+import org.apache.solr.request.StandardRequestHandler;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.SolrRequestHandler;
+
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.FieldType;
+
+import org.apache.solr.util.StrUtils;
+import org.apache.solr.util.NamedList;
+import org.apache.solr.util.SolrPluginUtils;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.analysis.Analyzer;
+
+import org.xmlpull.v1.XmlPullParserException;
+
+/* this is the standard logging framework for Solr */
+import java.util.logging.Logger;
+import java.util.logging.Level;
+import java.util.logging.Handler;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Collection;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.regex.Pattern;
+import java.io.IOException;
+import java.net.URL;
+
+/**
+ * <p>
+ * A Generic query plugin designed to be given a simple query expression
+ * from a user, which it will then query agaisnt a variety of
+ * pre-configured fields, in a variety of ways, using BooleanQueries,
+ * DisjunctionMaxQueries, and PhraseQueries.
+ * </p>
+ *
+ * <p>
+ * All of the following options may be configured for this plugin
+ * in the solrconfig as defaults, and may be overriden as request parameters
+ * </p>
+ *
+ * <ul>
+ * <li>tie - (Tie breaker) float value to use as tiebreaker in
+ * DisjunctionMaxQueries (should be something much less then 1)
+ * </li>
+ * <li> qf - (Query Fields) fields and boosts to use when building
+ * DisjunctionMaxQueries from the users query. Format is:
+ * "<code>fieldA^1.0 fieldB^2.2</code>".
+ * </li>
+ * <li> mm - (Minimum Match) this supports a wide variety of
+ * complex expressions.
+ * read {@link SolrPluginUtils#setMinShouldMatch SolrPluginUtils.setMinShouldMatch} for full details.
+ * </li>
+ * <li> pf - (Phrase Fields) fields/boosts to make phrase queries out
+ * of to boost
+ * the users query for exact matches on the specified fields.
+ * Format is: "<code>fieldA^1.0 fieldB^2.2</code>".
+ * </li>
+ * <li> ps - (Phrase Slop) amount of slop on phrase queries built for pf
+ * fields.
+ * </li>
+ * <li> bq - (Boost Query) a raw lucene query that will be included in the
+ * users query to influcene the score. If this is a BooleanQuery
+ * with a default boost (1.0f) then the individual clauses will be
+ * added directly to the main query. Otherwise the query will be
+ * included as is.
+ * </li>
+ * <li> bf - (Boost Functions) functions (with optional boosts) that will be
+ * included in the users query to influcene the score.
+ * Format is: "<code>funcA(arg1,arg2)^1.2
+ * funcB(arg3,arg4)^2.2</code>". NOTE: Whitespace is not allowed
+ * in the function arguments.
+ * </li>
+ * <li> fq - (Filter Query) a raw lucene query that can be used
+ * to restrict the super set of products we are interested in - more
+ * efficient then using bq, but doesn't influence score.
+ * </li>
+ * </ul>
+ *
+ * <p>
+ * The following options are only available as request params...
+ * </p>
+ *
+ * <ul>
+ * <li> q - (Query) the raw unparsed, unescaped, query from the user.
+ * </li>
+ * <li>sort - (Order By) list of fields and direction to sort on.
+ * </li>
+ * </ul>
+ */
+public class DisMaxRequestHandler
+ implements SolrRequestHandler, SolrInfoMBean {
+
+
+ /**
+ * A field we can't ever find in any schema, so we can safely tell
+ * DisjunctionMaxQueryParser to use it as our defaultField, and
+ * map aliases from it to any field in our schema.
+ */
+ private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
+
+ // statistics
+ // TODO: should we bother synchronizing these, or is an off-by-one error
+ // acceptable every million requests or so?
+ long numRequests;
+ long numErrors;
+
+ /** shorten the class referneces for utilities */
+ private static class U extends SolrPluginUtils {
+ /* :NOOP */
+ }
+
+ protected final U.CommonParams params = new U.CommonParams();
+
+ public DisMaxRequestHandler() {
+ super();
+ }
+
+ /* returns URLs to the Wiki pages */
+ public URL[] getDocs() {
+ /* :TODO: need docs */
+ return new URL[0];
+ }
+ public String getName() {
+ return this.getClass().getName();
+ }
+
+ public NamedList getStatistics() {
+ NamedList lst = new NamedList();
+ lst.add("requests", numRequests);
+ lst.add("errors", numErrors);
+ return lst;
+ }
+
+ public String getVersion() {
+ return "$Revision:$";
+ }
+
+ public String getDescription() {
+ return "DisjunctionMax Request Handler: Does relevancy based queries "
+ + "accross a variety of fields using configured boosts";
+ }
+
+ public Category getCategory() {
+ return Category.QUERYHANDLER;
+ }
+
+ public String getSourceId() {
+ return "$Id:$";
+ }
+
+ public String getSource() {
+ return "$URL:$";
+ }
+
+ /** sets the default variables for any usefull info it finds in the config
+ * if a config option is not inthe format expected, logs an warning
+ * and ignores it..
+ */
+ public void init(NamedList args) {
+
+ params.setValues(args);
+
+ }
+
+ public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
+ numRequests++;
+
+ try {
+
+ SolrIndexSearcher s = req.getSearcher();
+ IndexSchema schema = req.getSchema();
+
+ Map<String,Float> queryFields =
+ U.parseFieldBoosts(U.getParam(req, params.QF, params.qf));
+ Map<String,Float> phraseFields =
+ U.parseFieldBoosts(U.getParam(req, params.PF, params.pf));
+
+ float tiebreaker = U.getNumberParam
+ (req, params.TIE, params.tiebreaker).floatValue();
+
+ int pslop = U.getNumberParam(req, params.PS, params.pslop).intValue();
+
+ /* a generic parser for parsing regular lucene queries */
+ QueryParser p = new SolrQueryParser(schema, null);
+
+ /* a parser for dealing with user input, which will convert
+ * things to DisjunctionMaxQueries
+ */
+ U.DisjunctionMaxQueryParser up =
+ new U.DisjunctionMaxQueryParser(schema, IMPOSSIBLE_FIELD_NAME);
+ up.addAlias(IMPOSSIBLE_FIELD_NAME,
+ tiebreaker, queryFields);
+
+ /* for parsing slopy phrases using DisjunctionMaxQueries */
+ U.DisjunctionMaxQueryParser pp =
+ new U.DisjunctionMaxQueryParser(schema, IMPOSSIBLE_FIELD_NAME);
+ pp.addAlias(IMPOSSIBLE_FIELD_NAME,
+ tiebreaker, phraseFields);
+ pp.setPhraseSlop(pslop);
+
+
+ /* * * Main User Query * * */
+
+ String userQuery = U.partialEscape
+ (U.stripUnbalancedQuotes(req.getQueryString())).toString();
+
+ /* the main query we will execute. we disable the coord because
+ * this query is an artificial construct
+ */
+ BooleanQuery query = new BooleanQuery(true);
+
+ String minShouldMatch = U.getParam(req, params.MM, params.mm);
+
+ Query dis = up.parse(userQuery);
+
+ if (dis instanceof BooleanQuery) {
+ BooleanQuery t = new BooleanQuery();
+ U.flatenBooleanQuery(t, (BooleanQuery)dis);
+
+ U.setMinShouldMatch(t, minShouldMatch);
+
+ query.add(t, Occur.MUST);
+ } else {
+ query.add(dis, Occur.MUST);
+ }
+
+ /* * * Add on Phrases for the Query * * */
+
+ /* build up phrase boosting queries */
+
+ /* if the userQuery already has some quotes, stip them out.
+ * we've already done the phrases they asked for in the main
+ * part of the query, this is to boost docs that may not have
+ * matched those phrases but do match looser phrases.
+ */
+ String userPhraseQuery = userQuery.replace("\"","");
+ Query phrase = pp.parse("\"" + userPhraseQuery + "\"");
+ if (null != phrase) {
+ query.add(phrase, Occur.SHOULD);
+ }
+
+ /* * * Boosting Query * * */
+
+ String boostQuery = U.getParam(req, params.BQ, params.bq);
+ if (null != boostQuery && !boostQuery.equals("")) {
+ Query tmp = p.parse(boostQuery);
+ /* if the default boost was used, and we've got a BooleanQuery
+ * extract the subqueries out and use them directly
+ */
+ if (1.0f == tmp.getBoost() && tmp instanceof BooleanQuery) {
+ for (BooleanClause c : ((BooleanQuery)tmp).getClauses()) {
+ query.add(c);
+ }
+ } else {
+ query.add(tmp, BooleanClause.Occur.SHOULD);
+ }
+ }
+
+ /* * * Boosting Functions * * */
+
+ String boostFunc = U.getParam(req, params.BF, params.bf);
+ if (null != boostFunc && !boostFunc.equals("")) {
+ List<Query> funcs = U.parseFuncs(schema, boostFunc);
+ for (Query f : funcs) {
+ query.add(f, Occur.SHOULD);
+ }
+ }
+
+ /* * * Restrict Results * * */
+
+ List<Query> restrictions = new ArrayList<Query>(1);
+
+ /* User Restriction */
+ String filterQueryString = U.getParam(req, params.FQ, params.fq);
+ Query filterQuery = null;
+ if (null != filterQueryString && !filterQueryString.equals("")) {
+ filterQuery = p.parse(filterQueryString);
+ restrictions.add(filterQuery);
+ }
+
+ /* * * Generate Main Results * * */
+
+ DocList results = s.getDocList(query, restrictions,
+ SolrPluginUtils.getSort(req),
+ req.getStart(), req.getLimit(),
+ SolrIndexSearcher.GET_SCORES);
+ rsp.add("search-results",results);
+
+ U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
+
+
+ /* * * Debugging Info * * */
+
+ try {
+ NamedList debug = U.doStandardDebug(req, userQuery, query, results);
+ if (null != debug) {
+ debug.add("boostquery", boostQuery);
+ debug.add("boostfunc", boostFunc);
+
+ debug.add("filterquery", filterQueryString);
+ if (null != filterQuery) {
+ debug.add("parsedfilterquery",
+ QueryParsing.toString(filterQuery, schema));
+ }
+
+ rsp.add("debug", debug);
+ }
+
+ } catch (Exception e) {
+ SolrException.logOnce(SolrCore.log,
+ "Exception durring debug", e);
+ rsp.add("exception_during_debug", SolrException.toStr(e));
+ }
+
+ } catch (Exception e) {
+ SolrException.log(SolrCore.log,e);
+ rsp.setException(e);
+ numErrors++;
+ }
+ }
+
+}
Added: incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java?rev=408103&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java Sat May 20 15:17:21 2006
@@ -0,0 +1,823 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.core.SolrException;
+
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.search.SolrCache;
+import org.apache.solr.search.SolrQueryParser;
+import org.apache.solr.search.QueryParsing;
+import org.apache.solr.search.CacheRegenerator;
+
+import org.apache.solr.request.StandardRequestHandler;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.SolrRequestHandler;
+
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.schema.FieldType;
+
+import org.apache.solr.util.StrUtils;
+import org.apache.solr.util.NamedList;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.ConstantScoreRangeQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.analysis.Analyzer;
+
+import org.xmlpull.v1.XmlPullParserException;
+
+import java.util.logging.Logger;
+import java.util.logging.Level;
+import java.util.logging.Handler;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Collection;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.regex.Pattern;
+import java.io.IOException;
+import java.net.URL;
+
+/**
+ * <p>Utilities that may be of use to RequestHandlers.</p>
+ *
+ * <p>
+ * Many of these functions have code that was stolen/mutated from
+ * StandardRequestHandler.
+ * </p>
+ *
+ * <p>:TODO: refactor StandardRequestHandler to use these utilities</p>
+ */
+public class SolrPluginUtils {
+
+ /** standard param for field list */
+ public static String FL = CommonParams.FL;
+
+ /**
+ * SolrIndexSearch.numDocs(Query,Query) freaks out if the filtering
+ * query is null, so we use this workarround.
+ */
+ public static int numDocs(SolrIndexSearcher s, Query q, Query f)
+ throws IOException {
+
+ return (null == f) ? s.getDocSet(q).size() : s.numDocs(q,f);
+
+ }
+
+ /**
+ * Returns the param, or the default if it's empty or not specified.
+ */
+ public static String getParam(SolrQueryRequest req,
+ String param, String def) {
+
+ String v = req.getParam(param);
+ if (null == v || "".equals(v.trim())) {
+ return def;
+ }
+ return v;
+ }
+
+ /**
+ * Treats the param value as a Number, returns the default if nothing is
+ * there or if it's not a number.
+ */
+ public static Number getNumberParam(SolrQueryRequest req,
+ String param, Number def) {
+
+ Number r = def;
+ String v = req.getParam(param);
+ if (null == v || "".equals(v.trim())) {
+ return r;
+ }
+ try {
+ r = new Float(v);
+ } catch (NumberFormatException e) {
+ /* :NOOP" */
+ }
+ return r;
+ }
+
+
+
+ private final static Pattern splitList=Pattern.compile(",| ");
+
+ /**
+ * Assumes the standard query param of "fl" to specify the return fields
+ * @see #setReturnFields(String,SolrQueryResponse)
+ */
+ public static void setReturnFields(SolrQueryRequest req,
+ SolrQueryResponse res) {
+
+ setReturnFields(req.getParam(FL), res);
+ }
+
+ /**
+ * Given a space seperated list of field names, sets the field list on the
+ * SolrQueryResponse.
+ */
+ public static void setReturnFields(String fl,
+ SolrQueryResponse res) {
+
+ if (fl != null) {
+ // TODO - this could become more efficient if widely used.
+ // TODO - should field order be maintained?
+ String[] flst = splitList.split(fl.trim(),0);
+ if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
+ Set<String> set = new HashSet<String>();
+ for (String fname : flst) set.add(fname);
+ res.setReturnFields(set);
+ }
+ }
+ }
+
+ /**
+ * <p>
+ * Returns a NamedList containing many "standard" pieces of debugging
+ * information.
+ * </p>
+ *
+ * <ul>
+ * <li>rawquerystring - the 'q' param exactly as specified by the client
+ * </li>
+ * <li>querystring - the 'q' param after any preprocessing done by the plugin
+ * </li>
+ * <li>parsedquery - the main query executed formated by the Solr
+ * QueryParsing utils class (which knows about field types)
+ * </li>
+ * <li>parsedquery_toString - the main query executed formated by it's
+ * own toString method (in case it has internal state Solr
+ * doesn't know about)
+ * </li>
+ * <li>expain - the list of score explanations for each document in
+ * results against query.
+ * </li>
+ * <li>otherQuery - the query string specified in 'explainOther' query param.
+ * </li>
+ * <li>explainOther - the list of score explanations for each document in
+ * results against 'otherQuery'
+ * </li>
+ * </ul>
+ *
+ * @param req the request we are dealing with
+ * @param userQuery the users query as a string, after any basic
+ * preprocessing has been done
+ * @param query the query built from the userQuery
+ * (and perhaps other clauses) that identifies the main
+ * result set of the response.
+ * @param results the main result set of hte response
+ */
+ public static NamedList doStandardDebug(SolrQueryRequest req,
+ String userQuery,
+ Query query,
+ DocList results)
+ throws IOException {
+
+
+ String debug = req.getParam("debugQuery");
+
+ NamedList dbg = null;
+ if (debug!=null) {
+ dbg = new NamedList();
+
+ /* userQuery may have been pre-processes .. expose that */
+ dbg.add("rawquerystring",req.getQueryString());
+ dbg.add("querystring",userQuery);
+
+ /* QueryParsing.toString isn't perfect, use it to see converted
+ * values, use regular toString to see any attributes of the
+ * underlying Query it may have missed.
+ */
+ dbg.add("parsedquery",QueryParsing.toString(query, req.getSchema()));
+ dbg.add("parsedquery_toString", query.toString());
+
+ dbg.add("explain", getExplainList
+ (query, results, req.getSearcher(), req.getSchema()));
+ String otherQueryS = req.getParam("explainOther");
+ if (otherQueryS != null && otherQueryS.length() > 0) {
+ DocList otherResults = doSimpleQuery
+ (otherQueryS,req.getSearcher(), req.getSchema(),0,10);
+ dbg.add("otherQuery",otherQueryS);
+ dbg.add("explainOther", getExplainList
+ (query, otherResults,
+ req.getSearcher(),
+ req.getSchema()));
+ }
+ }
+
+ return dbg;
+ }
+
+
+ /**
+ * Generates an list of Explanations for each item in a list of docs.
+ *
+ * @param query The Query you want explanations in the context of
+ * @param docs The Documents you want explained relative that query
+ */
+ public static NamedList getExplainList(Query query, DocList docs,
+ SolrIndexSearcher searcher,
+ IndexSchema schema)
+ throws IOException {
+
+ NamedList explainList = new NamedList();
+ DocIterator iterator = docs.iterator();
+ for (int i=0; i<docs.size(); i++) {
+ int id = iterator.nextDoc();
+
+ Explanation explain = searcher.explain(query, id);
+
+ Document doc = searcher.doc(id);
+ String strid = schema.printableUniqueKey(doc);
+ String docname = "";
+ if (strid != null) docname="id="+strid+",";
+ docname = docname + "internal_docid="+id;
+
+ explainList.add(docname, "\n" +explain.toString());
+ }
+ return explainList;
+ }
+
+ /**
+ * Executes a basic query in lucene syntax
+ */
+ public static DocList doSimpleQuery(String sreq,
+ SolrIndexSearcher searcher,
+ IndexSchema schema,
+ int start, int limit) throws IOException {
+ List<String> commands = StrUtils.splitSmart(sreq,';');
+
+ String qs = commands.size() >= 1 ? commands.get(0) : "";
+ Query query = QueryParsing.parseQuery(qs, schema);
+
+ // If the first non-query, non-filter command is a simple sort on an indexed field, then
+ // we can use the Lucene sort ability.
+ Sort sort = null;
+ if (commands.size() >= 2) {
+ QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), schema);
+ if (sortSpec != null) {
+ sort = sortSpec.getSort();
+ if (sortSpec.getCount() >= 0) {
+ limit = sortSpec.getCount();
+ }
+ }
+ }
+
+ DocList results = searcher.getDocList(query,(DocSet)null, sort, start, limit);
+ return results;
+ }
+
+ /**
+ * Given a string containing fieldNames and boost info,
+ * converts it to a Map from field name to boost info.
+ *
+ * <p>
+ * Doesn't care if boost info is negative, you're on your own.
+ * </p>
+ * <p>
+ * Doesn't care if boost info is missing, again: you're on your own.
+ * </p>
+ *
+ * @param in a String like "fieldOne^2.3 fieldTwo fieldThree^-0.4"
+ * @return Map of fieldOne => 2.3, fieldTwo => null, fieldThree => -0.4
+ */
+ public static Map<String,Float> parseFieldBoosts(String in) {
+
+ if (null == in || "".equals(in.trim())) {
+ return new HashMap<String,Float>();
+ }
+
+ String[] bb = in.trim().split("\\s+");
+ Map<String, Float> out = new HashMap<String,Float>(7);
+ for (String s : bb) {
+ String[] bbb = s.split("\\^");
+ out.put(bbb[0], 1 == bbb.length ? null : Float.valueOf(bbb[1]));
+ }
+ return out;
+ }
+
+ /**
+ * Given a string containing functions with optional boosts, returns
+ * an array of Queries representing those functions with the specified
+ * boosts.
+ * <p>
+ * NOTE: intra-function whitespace is not allowed.
+ * </p>
+ * @see #parseFieldBoosts
+ */
+ public static List<Query> parseFuncs(IndexSchema s, String in)
+ throws ParseException {
+
+ Map<String,Float> ff = parseFieldBoosts(in);
+ List<Query> funcs = new ArrayList<Query>(ff.keySet().size());
+ for (String f : ff.keySet()) {
+ Query fq = QueryParsing.parseFunction(f, s);
+ Float b = ff.get(f);
+ if (null != b) {
+ fq.setBoost(b);
+ }
+ funcs.add(fq);
+ }
+ return funcs;
+ }
+
+
+ /**
+ * Checks the number of optional clauses in the query, and compares it
+ * with the specification string to determine the proper value to use.
+ *
+ * <p>
+ * Details about the specification format can be found
+ * <a href="doc-files/min-should-match.html">here</a>
+ * </p>
+ *
+ * <p>A few important notes...</p>
+ * <ul>
+ * <li>
+ * If the calculations based on the specification determine that no
+ * optional clauses are needed, BooleanQuerysetMinMumberShouldMatch
+ * will never be called, but the usual rules about BooleanQueries
+ * still apply at search time (a BooleanQuery containing no required
+ * clauses must still match at least one optional clause)
+ * <li>
+ * <li>
+ * No matter what number the calculation arrives at,
+ * BooleanQuery.setMinShouldMatch() will never be called with a
+ * value greater then the number of optional clauses (or less then 1)
+ * </li>
+ * </ul>
+ *
+ * <p>:TODO: should optimize the case where number is same
+ * as clauses to just make them all "required"
+ * </p>
+ */
+ public static void setMinShouldMatch(BooleanQuery q, String spec) {
+
+ int optionalClauses = 0;
+ for (BooleanClause c : q.getClauses()) {
+ if (c.getOccur() == Occur.SHOULD) {
+ optionalClauses++;
+ }
+ }
+
+ int msm = calculateMinShouldMatch(optionalClauses, spec);
+ if (0 < msm) {
+ q.setMinimumNumberShouldMatch(msm);
+ }
+ }
+
+ /**
+ * helper exposed for UnitTests
+ * @see #setMinShouldMatch
+ */
+ static int calculateMinShouldMatch(int optionalClauseCount, String spec) {
+
+ int result = optionalClauseCount;
+
+
+ if (-1 < spec.indexOf("<")) {
+ /* we have conditional spec(s) */
+
+ for (String s : spec.trim().split(" ")) {
+ String[] parts = s.split("<");
+ int upperBound = (new Integer(parts[0])).intValue();
+ if (optionalClauseCount <= upperBound) {
+ return result;
+ } else {
+ result = calculateMinShouldMatch
+ (optionalClauseCount, parts[1]);
+ }
+ }
+ return result;
+ }
+
+ /* otherwise, simple expresion */
+
+ if (-1 < spec.indexOf("%")) {
+ /* percentage */
+ int percent = new Integer(spec.replace("%","")).intValue();
+ float calc = (result * percent) / 100f;
+ result = calc < 0 ? result + (int)calc : (int)calc;
+ } else {
+ int calc = (new Integer(spec)).intValue();
+ result = calc < 0 ? result + calc : calc;
+ }
+
+ return (optionalClauseCount < result ?
+ optionalClauseCount : (result < 0 ? 0 : result));
+
+ }
+
+
+ /**
+ * Recursively walks the "from" query pulling out sub-queries and
+ * adding them to the "to" query.
+ *
+ * <p>
+ * Boosts are multiplied as needed. Sub-BooleanQueryies which are not
+ * optional will not be flattened. From will be mangled durring the walk,
+ * so do not attempt to reuse it.
+ * </p>
+ */
+ public static void flatenBooleanQuery(BooleanQuery to, BooleanQuery from) {
+
+ BooleanClause[] c = from.getClauses();
+ for (int i = 0; i < c.length; i++) {
+
+ Query ci = c[i].getQuery();
+ ci.setBoost(ci.getBoost() * from.getBoost());
+
+ if (ci instanceof BooleanQuery
+ && !c[i].isRequired()
+ && !c[i].isProhibited()) {
+
+ /* we can recurse */
+ flatenBooleanQuery(to, (BooleanQuery)ci);
+
+ } else {
+ to.add(c[i]);
+ }
+ }
+ }
+
+ /**
+ * Escapes all special characters except '"', '-', and '+'
+ *
+ * @see QueryParser#escape
+ */
+ public static CharSequence partialEscape(CharSequence s) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ if (c == '\\' || c == '!' || c == '(' || c == ')' ||
+ c == ':' || c == '^' || c == '[' || c == ']' ||
+ c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
+ ) {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb;
+ }
+
+ /**
+ * Returns it's input if there is an even (ie: balanced) number of
+ * '"' characters -- otherwise returns a String in which all '"'
+ * characters are striped out.
+ */
+ public static CharSequence stripUnbalancedQuotes(CharSequence s) {
+ int count = 0;
+ for (int i = 0; i < s.length(); i++) {
+ if (s.charAt(i) == '\"') { count++; }
+ }
+ if (0 == (count & 1)) {
+ return s;
+ }
+ return s.toString().replace("\"","");
+ }
+
+
+
+ /**
+ * A collection on common params, both for Plugin initialization and
+ * for Requests.
+ */
+ public static class CommonParams {
+
+ /** query and init param for tiebreaker value */
+ public static String TIE = "tie";
+ /** query and init param for query fields */
+ public static String QF = "qf";
+ /** query and init param for phrase boost fields */
+ public static String PF = "pf";
+ /** query and init param for MinShouldMatch specification */
+ public static String MM = "mm";
+ /** query and init param for Phrase Slop value */
+ public static String PS = "ps";
+ /** query and init param for boosting query */
+ public static String BQ = "bq";
+ /** query and init param for boosting functions */
+ public static String BF = "bf";
+ /** query and init param for filtering query */
+ public static String FQ = "fq";
+ /** query and init param for field list */
+ public static String FL = "fl";
+ /** query and init param for field list */
+ public static String GEN = "gen";
+
+ /** the default tie breaker to use in DisjunctionMaxQueries */
+ public float tiebreaker = 0.0f;
+ /** the default query fields to be used */
+ public String qf = null;
+ /** the default phrase boosting fields to be used */
+ public String pf = null;
+ /** the default min should match to be used */
+ public String mm = "100%";
+ /** the default phrase slop to be used */
+ public int pslop = 0;
+ /** the default boosting query to be used */
+ public String bq = null;
+ /** the default boosting functions to be used */
+ public String bf = null;
+ /** the default filtering query to be used */
+ public String fq = null;
+ /** the default field list to be used */
+ public String fl = null;
+
+ public CommonParams() {
+ /* :NOOP: */
+ }
+
+ /** @see #setValues */
+ public CommonParams(NamedList args) {
+ this();
+ setValues(args);
+ }
+
+ /**
+ * Sets the params using values from a NamedList, usefull in the
+ * init method for your handler.
+ *
+ * <p>
+ * If any param is not of the expected type, a severe error is
+ * logged,and the param is skipped.
+ * </p>
+ *
+ * <p>
+ * If any param is not of in the NamedList, it is skipped and the
+ * old value is left alone.
+ * </p>
+ *
+ */
+ public void setValues(NamedList args) {
+
+ Object tmp;
+
+ tmp = args.get(TIE);
+ if (null != tmp) {
+ if (tmp instanceof Float) {
+ tiebreaker = ((Float)tmp).floatValue();
+ } else {
+ SolrCore.log.severe("init param is not a float: " + TIE);
+ }
+ }
+
+ tmp = args.get(QF);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ qf = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + QF);
+ }
+ }
+
+ tmp = args.get(PF);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ pf = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + PF);
+ }
+ }
+
+
+ tmp = args.get(MM);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ mm = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + MM);
+ }
+ }
+
+ tmp = args.get(PS);
+ if (null != tmp) {
+ if (tmp instanceof Integer) {
+ pslop = ((Integer)tmp).intValue();
+ } else {
+ SolrCore.log.severe("init param is not an int: " + PS);
+ }
+ }
+
+ tmp = args.get(BQ);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ bq = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + BQ);
+ }
+ }
+
+ tmp = args.get(BF);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ bf = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + BF);
+ }
+ }
+
+ tmp = args.get(FQ);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ fq = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + FQ);
+ }
+ }
+
+ tmp = args.get(FL);
+ if (null != tmp) {
+ if (tmp instanceof String) {
+ fl = tmp.toString();
+ } else {
+ SolrCore.log.severe("init param is not a str: " + FL);
+ }
+ }
+
+ }
+
+ }
+
+ /**
+ * A subclass of SolrQueryParser that supports aliasing fields for
+ * constructing DisjunctionMaxQueries.
+ */
+ public static class DisjunctionMaxQueryParser extends SolrQueryParser {
+
+ /** A simple container for storing alias info
+ * @see #aliases
+ */
+ protected static class Alias {
+ public float tie;
+ public Map<String,Float> fields;
+ }
+
+ /**
+ * Where we store a map from field name we expect to see in our query
+ * string, to Alias object containing the fields to use in our
+ * DisjunctionMaxQuery and the tiebreaker to use.
+ */
+ protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);
+
+ public DisjunctionMaxQueryParser(IndexSchema s, String defaultField) {
+ super(s,defaultField);
+ }
+ public DisjunctionMaxQueryParser(IndexSchema s) {
+ this(s,null);
+ }
+
+ /**
+ * Add an alias to this query parser.
+ *
+ * @param field the field name that should trigger alias mapping
+ * @param fieldBoosts the mapping from fieldname to boost value that
+ * should be used to build up the clauses of the
+ * DisjunctionMaxQuery.
+ * @param tiebreaker to the tiebreaker to be used in the
+ * DisjunctionMaxQuery
+ * @see SolrPluginUtils#parseFieldBoosts
+ */
+ public void addAlias(String field, float tiebreaker,
+ Map<String,Float> fieldBoosts) {
+
+ Alias a = new Alias();
+ a.tie = tiebreaker;
+ a.fields = fieldBoosts;
+ aliases.put(field, a);
+ }
+
+ /**
+ * Delegates to the super class unless the field has been specified
+ * as an alias -- in which case we recurse on each of
+ * the aliased fields, and the results are composed into a
+ * DisjunctionMaxQuery. (so yes: aliases which point at other
+ * aliases should work)
+ */
+ protected Query getFieldQuery(String field, String queryText)
+ throws ParseException {
+
+ if (aliases.containsKey(field)) {
+
+ Alias a = aliases.get(field);
+ DisjunctionMaxQuery q = new DisjunctionMaxQuery(a.tie);
+
+ /* we might not get any valid queries from delegation,
+ * in which we should return null
+ */
+ boolean ok = false;
+
+ for (String f : a.fields.keySet()) {
+
+ Query sub = getFieldQuery(f,queryText);
+ if (null != sub) {
+ if (null != a.fields.get(f)) {
+ sub.setBoost(a.fields.get(f));
+ }
+ q.add(sub);
+ ok = true;
+ }
+ }
+ return ok ? q : null;
+
+ } else {
+ return super.getFieldQuery(field, queryText);
+ }
+ }
+
+ }
+
+
+
+ /**
+ * Determines the correct Sort based on the request parameter "sort"
+ *
+ * @return null if no sort is specified.
+ */
+ public static Sort getSort(SolrQueryRequest req) {
+
+ String sort = req.getParam("sort");
+ if (null == sort || sort.equals("")) {
+ return null;
+ }
+
+ SolrException sortE = null;
+ QueryParsing.SortSpec ss = null;
+ try {
+ ss = QueryParsing.parseSort(sort, req.getSchema());
+ } catch (SolrException e) {
+ sortE = e;
+ }
+
+ if ((null == ss) || (null != sortE)) {
+ /* we definitely had some sort of sort string from the user,
+ * but no SortSpec came out of it
+ */
+ SolrCore.log.log(Level.WARNING,"Invalid sort \""+sort+"\" was specified, ignoring", sortE);
+ return null;
+ }
+
+ return ss.getSort();
+ }
+
+
+ /**
+ * A CacheRegenerator that can be used whenever the items in the cache
+ * are not dependant on the current searcher.
+ *
+ * <p>
+ * Flat out copies the oldKey=>oldVal pair into the newCache
+ * </p>
+ */
+ public static class IdentityRegenerator implements CacheRegenerator {
+ public boolean regenerateItem(SolrIndexSearcher newSearcher,
+ SolrCache newCache,
+ SolrCache oldCache,
+ Object oldKey,
+ Object oldVal)
+ throws IOException {
+
+ newCache.put(oldKey,oldVal);
+ return true;
+ }
+
+ }
+
+
+}
Added: incubator/solr/trunk/src/java/org/apache/solr/util/doc-files/min-should-match.html
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/util/doc-files/min-should-match.html?rev=408103&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/util/doc-files/min-should-match.html (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/util/doc-files/min-should-match.html Sat May 20 15:17:21 2006
@@ -0,0 +1,110 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<title>Min Number Should Match Specification Format</title>
+</head>
+
+<body>
+<h1>Min Number Should Match Specification Format</h1>
+
+<blockquote>
+This document explains the format used for specifying the
+"Min Number Should Match" criteria of the BooleanQuery objects built by the
+DisMaxRequestHandler.
+</blockquote>
+
+
+<h2>Explaination of Concept: "Min Number Should Match"</h2>
+<div>
+:TODO:
+</div>
+
+
+<h2>Specification Format</h2>
+<div>
+
+<p>Specification strings may have the following formats...</p>
+
+<dl>
+
+ <dt><code>3</code></dt>
+ <dd>A positive integer, indicating a fixed value regardless of the
+ number of optional clauses.
+ </dd>
+
+ <dt><code>-2</code></dt>
+ <dd>A negative integer, indicating that the total number of optional clauses,
+ minus this number should be mandatory.
+ </dd>
+
+ <dt><code>75%</code></dt>
+ <dd>A percentage, indicating that this percent of the total number of
+ optional clauses are neccessary. The number computed from the
+ percentage is rounded down and used as the minimum.
+ </dd>
+
+ <dt><code>-25%</code></dt>
+ <dd>A negative percentage, indicating that this percent of the total
+ number of optional clauses can be missing. The number computed from the
+ percentage is rounded down, before being subtracted from the total
+ to determine the minimum.
+ </dd>
+
+ <dt><code>3<90%</code></dt>
+ <dd>A positive integer, followed by the less-than symbol, followed
+ by any of the previously mentioned specifiers is a conditional
+ specification. It indicates that if the number of optional clauses is
+ equal to (or less than) the integer, they are all required, but
+ if it's greater then the integer, the specification applies.
+ In this example: if there are 1 to 3 clauses they are all required,
+ but for 4 or more clauses only 90% are required.
+ </dd>
+
+ <dt><code>2<-25% 9<-3</code></dt>
+ <dd>Multiple conditional specifications can be seperated by spaces,
+ each one only being valid for numbers greater then the one before it.
+ In this example: if there are 1 or 2 clauses both are required,
+ if there are 3-9 clauses all but 25% are requred, and if there
+ are more then 9 clauses, all but three are required.
+ </dd>
+</dl>
+
+<p>
+ A few important notes...
+</p>
+
+<ul>
+ <li>
+ When dealing with percentages, negative values can be used to get
+ different behavior in edge cases. 75% and -25% mean the same thing
+ when dealing with 4 clauses, but when dealing with 5 clauses 75% means
+ 3 are required, but -25% means 4 are required.
+ </li>
+ <li>
+ If the calculations based on the specification determine that no
+ optional clauses are needed, the usual rules about BooleanQueries
+ still apply at search time (a BooleanQuery containing no required
+ clauses must still match at least one optional clause)
+ </li>
+ <li>
+ No matter what number the calculation arrives at,
+ a value greater then the number of optional clauses, or a value less then
+ 1 will never be used. (ie: no matter how low or how high the result of the
+ calculation result is, the minimum number of required matches will never
+ be lower then 1 or greatered then the number of clauses.
+ </li>
+</ul>
+
+
+</div>
+
+
+
+
+<hr>
+<pre>
+$Id:$
+$Source:$
+</pre>
+
+</body> </html>
Added: incubator/solr/trunk/src/test/org/apache/solr/DisMaxRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/DisMaxRequestHandlerTest.java?rev=408103&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/DisMaxRequestHandlerTest.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/DisMaxRequestHandlerTest.java Sat May 20 15:17:21 2006
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr;
+
+import org.apache.solr.request.*;
+import org.apache.solr.util.*;
+import org.w3c.dom.Document;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.DocumentBuilder;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.ByteArrayInputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Map;
+import java.util.HashMap;
+
+/**
+ * Tests some basic functionality of the DisMaxRequestHandler
+ */
+public class DisMaxRequestHandlerTest extends AbstractSolrTestCase {
+
+ public String getSchemaFile() { return "schema.xml"; }
+ public String getSolrConfigFile() { return "solrconfig.xml"; }
+ public void setUp() throws Exception {
+ super.setUp();
+ lrf = h.getRequestFactory
+ ("dismax",0,20,"version","2.0");
+ }
+ public void testSomeStuff() throws Exception {
+
+ assertU(adoc("id", "666",
+ "features_t", "cool and scary stuff",
+ "subject", "traveling in hell",
+ "title", "The Omen",
+ "weight", "87.9",
+ "iind", "666"));
+ assertU(adoc("id", "42",
+ "features_t", "cool stuff",
+ "subject", "traveling the galaxy",
+ "title", "Hitch Hiker's Guide to the Galaxy",
+ "weight", "99.45",
+ "iind", "42"));
+ assertU(adoc("id", "1",
+ "features_t", "nothing",
+ "subject", "garbage",
+ "title", "Most Boring Guide Ever",
+ "weight", "77",
+ "iind", "4"));
+ assertU(adoc("id", "8675309",
+ "features_t", "Wikedly memorable chorus and stuff",
+ "subject", "One Cool Hot Chick",
+ "title", "Jenny",
+ "weight", "97.3",
+ "iind", "8675309"));
+ assertU(commit());
+
+ assertQ("basic match",
+ req("guide")
+ ,"//*[@numFound='2']"
+ );
+
+ assertQ("basic cross field matching, boost on same field matching",
+ req("cool stuff")
+ ,"//*[@numFound='3']"
+ ,"//result/doc[1]/int[@name='id'][.='42']"
+ ,"//result/doc[2]/int[@name='id'][.='666']"
+ ,"//result/doc[3]/int[@name='id'][.='8675309']"
+ );
+
+ assertQ("minimum mm is three",
+ req("cool stuff traveling")
+ ,"//*[@numFound='2']"
+ ,"//result/doc[1]/int[@name='id'][. ='42']"
+ ,"//result/doc[2]/int[@name='id'][. ='666']"
+ );
+
+ assertQ("at 4 mm allows one missing ",
+ req("cool stuff traveling jenny")
+ ,"//*[@numFound='3']"
+ );
+
+ }
+
+
+
+
+
+}
Added: incubator/solr/trunk/src/test/org/apache/solr/util/SolrPluginUtilsTest.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/util/SolrPluginUtilsTest.java?rev=408103&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/util/SolrPluginUtilsTest.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/util/SolrPluginUtilsTest.java Sat May 20 15:17:21 2006
@@ -0,0 +1,329 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import org.apache.solr.search.SolrQueryParser;
+import org.apache.solr.util.NamedList;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.util.SolrPluginUtils.DisjunctionMaxQueryParser;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanClause.Occur;
+
+import org.xmlpull.v1.XmlPullParserFactory;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+import java.io.File;
+import java.math.BigDecimal;
+import java.util.Random;
+import java.util.Date;
+import java.util.List;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
+
+/**
+ * Tests that the functions in SolrPluginUtils work as advertised.
+ */
+public class SolrPluginUtilsTest extends AbstractSolrTestCase {
+
+ public String getSchemaFile() { return "schema.xml"; }
+ public String getSolrConfigFile() { return "solrconfig.xml"; }
+
+ public void testPartialEscape() {
+
+ assertEquals("",pe(""));
+ assertEquals("foo",pe("foo"));
+ assertEquals("foo\\:bar",pe("foo:bar"));
+ assertEquals("+foo\\:bar",pe("+foo:bar"));
+ assertEquals("foo \\! bar",pe("foo ! bar"));
+ assertEquals("foo\\?",pe("foo?"));
+ assertEquals("foo \"bar\"",pe("foo \"bar\""));
+ assertEquals("foo\\! \"bar\"",pe("foo! \"bar\""));
+
+ }
+
+ public void testStripUnbalancedQuotes() {
+
+ assertEquals("",strip(""));
+ assertEquals("foo",strip("foo"));
+ assertEquals("foo \"bar\"",strip("foo \"bar\""));
+ assertEquals("42",strip("42\""));
+ assertEquals("\"how now brown cow?\"",strip("\"how now brown cow?\""));
+ assertEquals("\"you go\" \"now!\"",strip("\"you go\" \"now!\""));
+
+ }
+
+ public void testParseFieldBoosts() throws Exception {
+
+ Map<String,Float> e1 = new HashMap<String,Float>();
+ e1.put("fieldOne",2.3f);
+ e1.put("fieldTwo",null);
+ e1.put("fieldThree",-0.4f);
+
+ assertEquals("basic e1", e1, SolrPluginUtils.parseFieldBoosts
+ ("fieldOne^2.3 fieldTwo fieldThree^-0.4"));
+ assertEquals("spacey e1", e1, SolrPluginUtils.parseFieldBoosts
+ (" fieldOne^2.3 fieldTwo fieldThree^-0.4 "));
+ assertEquals("really spacey e1", e1, SolrPluginUtils.parseFieldBoosts
+ (" \t fieldOne^2.3 \n fieldTwo fieldThree^-0.4 "));
+
+ Map<String,Float> e2 = new HashMap<String,Float>();
+ assertEquals("empty e2", e2, SolrPluginUtils.parseFieldBoosts
+ (""));
+ assertEquals("spacey e2", e2, SolrPluginUtils.parseFieldBoosts
+ (" \t "));
+ }
+
+
+ public void testDisjunctionMaxQueryParser() throws Exception {
+
+ Query out;
+ String t;
+
+ DisjunctionMaxQueryParser qp =
+ new SolrPluginUtils.DisjunctionMaxQueryParser(h.getCore().getSchema());
+
+ qp.addAlias("hoss", 0.01f, SolrPluginUtils.parseFieldBoosts
+ ("title^2.0 title_stemmed name^1.2 subject^0.5"));
+ qp.addAlias("test", 0.01f, SolrPluginUtils.parseFieldBoosts("text^2.0"));
+ qp.addAlias("unused", 1.0f, SolrPluginUtils.parseFieldBoosts
+ ("subject^0.5 sind^1.5"));
+
+
+ /* first some sanity tests that don't use aliasing at all */
+
+ t = "XXXXXXXX";
+ out = qp.parse(t);
+ assertNotNull(t+" sanity test gave back null", out);
+ assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
+ out instanceof TermQuery);
+ assertEquals(t+" sanity test is wrong field",
+ h.getCore().getSchema().getDefaultSearchFieldName(),
+ ((TermQuery)out).getTerm().field());
+
+ t = "subject:XXXXXXXX";
+ out = qp.parse(t);
+ assertNotNull(t+" sanity test gave back null", out);
+ assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
+ out instanceof TermQuery);
+ assertEquals(t+" sanity test is wrong field", "subject",
+ ((TermQuery)out).getTerm().field());
+
+ /* field has untokenzied type, so this should be a term anyway */
+ t = "sind:\"simple phrase\"";
+ out = qp.parse(t);
+ assertNotNull(t+" sanity test gave back null", out);
+ assertTrue(t+" sanity test isn't TermQuery: " + out.getClass(),
+ out instanceof TermQuery);
+ assertEquals(t+" sanity test is wrong field", "sind",
+ ((TermQuery)out).getTerm().field());
+
+ t = "subject:\"simple phrase\"";
+ out = qp.parse(t);
+ assertNotNull(t+" sanity test gave back null", out);
+ assertTrue(t+" sanity test isn't PhraseQuery: " + out.getClass(),
+ out instanceof PhraseQuery);
+ assertEquals(t+" sanity test is wrong field", "subject",
+ ((PhraseQuery)out).getTerms()[0].field());
+
+
+ /* now some tests that use aliasing */
+
+ /* basic usage of single "term" */
+ t = "hoss:XXXXXXXX";
+ out = qp.parse(t);
+ assertNotNull(t+" was null", out);
+ assertTrue(t+" wasn't a DMQ:" + out.getClass(),
+ out instanceof DisjunctionMaxQuery);
+ assertEquals(t+" wrong number of clauses", 4,
+ countItems(((DisjunctionMaxQuery)out).iterator()));
+
+
+ /* odd case, but should still work, DMQ of one clause */
+ t = "test:YYYYY";
+ out = qp.parse(t);
+ assertNotNull(t+" was null", out);
+ assertTrue(t+" wasn't a DMQ:" + out.getClass(),
+ out instanceof DisjunctionMaxQuery);
+ assertEquals(t+" wrong number of clauses", 1,
+ countItems(((DisjunctionMaxQuery)out).iterator()));
+
+ /* basic usage of multiple "terms" */
+ t = "hoss:XXXXXXXX test:YYYYY";
+ out = qp.parse(t);
+ assertNotNull(t+" was null", out);
+ assertTrue(t+" wasn't a boolean:" + out.getClass(),
+ out instanceof BooleanQuery);
+ {
+ BooleanQuery bq = (BooleanQuery)out;
+ assertEquals(t+" wrong number of clauses", 2,
+ bq.getClauses().length);
+ Query sub = bq.getClauses()[0].getQuery();
+ assertTrue(t+" first wasn't a DMQ:" + sub.getClass(),
+ sub instanceof DisjunctionMaxQuery);
+ assertEquals(t+" first had wrong number of clauses", 4,
+ countItems(((DisjunctionMaxQuery)sub).iterator()));
+ sub = bq.getClauses()[1].getQuery();
+ assertTrue(t+" second wasn't a DMQ:" + sub.getClass(),
+ sub instanceof DisjunctionMaxQuery);
+ assertEquals(t+" second had wrong number of clauses", 1,
+ countItems(((DisjunctionMaxQuery)sub).iterator()));
+ }
+
+ /* a phrase, and a term that is a stop word for some fields */
+ t = "hoss:\"XXXXXX YYYYY\" hoss:the";
+ out = qp.parse(t);
+ assertNotNull(t+" was null", out);
+ assertTrue(t+" wasn't a boolean:" + out.getClass(),
+ out instanceof BooleanQuery);
+ {
+ BooleanQuery bq = (BooleanQuery)out;
+ assertEquals(t+" wrong number of clauses", 2,
+ bq.getClauses().length);
+ Query sub = bq.getClauses()[0].getQuery();
+ assertTrue(t+" first wasn't a DMQ:" + sub.getClass(),
+ sub instanceof DisjunctionMaxQuery);
+ assertEquals(t+" first had wrong number of clauses", 4,
+ countItems(((DisjunctionMaxQuery)sub).iterator()));
+ sub = bq.getClauses()[1].getQuery();
+ assertTrue(t+" second wasn't a DMQ:" + sub.getClass(),
+ sub instanceof DisjunctionMaxQuery);
+ assertEquals(t+" second had wrong number of clauses (stop words)", 2,
+ countItems(((DisjunctionMaxQuery)sub).iterator()));
+ }
+
+
+
+ }
+
+ private static int countItems(Iterator i) {
+ int count = 0;
+ while (i.hasNext()) {
+ count++;
+ i.next();
+ }
+ return count;
+ }
+
+ public void testMinShouldMatchCalculator() {
+
+ /* zero is zero is zero */
+ assertEquals(0, calcMSM(5, "0"));
+ assertEquals(0, calcMSM(5, "0%"));
+ assertEquals(0, calcMSM(5, "-5"));
+ assertEquals(0, calcMSM(5, "-100%"));
+
+ /* basic integers */
+ assertEquals(3, calcMSM(5, "3"));
+ assertEquals(2, calcMSM(5, "-3"));
+ assertEquals(3, calcMSM(3, "3"));
+ assertEquals(0, calcMSM(3, "-3"));
+ assertEquals(3, calcMSM(3, "5"));
+ assertEquals(0, calcMSM(3, "-5"));
+
+ /* positive percentages with rounding */
+ assertEquals(0, calcMSM(3, "25%"));
+ assertEquals(1, calcMSM(4, "25%"));
+ assertEquals(1, calcMSM(5, "25%"));
+ assertEquals(2, calcMSM(10, "25%"));
+
+ /* negative percentages with rounding */
+ assertEquals(3, calcMSM(3, "-25%"));
+ assertEquals(3, calcMSM(4, "-25%"));
+ assertEquals(4, calcMSM(5, "-25%"));
+ assertEquals(8, calcMSM(10, "-25%"));
+
+ /* conditional */
+ assertEquals(1, calcMSM(1, "3<0"));
+ assertEquals(2, calcMSM(2, "3<0"));
+ assertEquals(3, calcMSM(3, "3<0"));
+ assertEquals(0, calcMSM(4, "3<0"));
+ assertEquals(0, calcMSM(5, "3<0"));
+ assertEquals(1, calcMSM(1, "3<25%"));
+ assertEquals(2, calcMSM(2, "3<25%"));
+ assertEquals(3, calcMSM(3, "3<25%"));
+ assertEquals(1, calcMSM(4, "3<25%"));
+ assertEquals(1, calcMSM(5, "3<25%"));
+
+ /* multiple conditionals */
+ assertEquals(1, calcMSM(1, "3<-25% 10<-3"));
+ assertEquals(2, calcMSM(2, "3<-25% 10<-3"));
+ assertEquals(3, calcMSM(3, "3<-25% 10<-3"));
+ assertEquals(3, calcMSM(4, "3<-25% 10<-3"));
+ assertEquals(4, calcMSM(5, "3<-25% 10<-3"));
+ assertEquals(5, calcMSM(6, "3<-25% 10<-3"));
+ assertEquals(6, calcMSM(7, "3<-25% 10<-3"));
+ assertEquals(6, calcMSM(8, "3<-25% 10<-3"));
+ assertEquals(7, calcMSM(9, "3<-25% 10<-3"));
+ assertEquals(8, calcMSM(10, "3<-25% 10<-3"));
+ assertEquals(8, calcMSM(11, "3<-25% 10<-3"));
+ assertEquals(9, calcMSM(12, "3<-25% 10<-3"));
+ assertEquals(97, calcMSM(100, "3<-25% 10<-3"));
+
+ BooleanQuery q = new BooleanQuery();
+ q.add(new TermQuery(new Term("a","b")), Occur.SHOULD);
+ q.add(new TermQuery(new Term("a","c")), Occur.SHOULD);
+ q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
+ q.add(new TermQuery(new Term("a","d")), Occur.SHOULD);
+
+ SolrPluginUtils.setMinShouldMatch(q, "0");
+ assertEquals(0, q.getMinimumNumberShouldMatch());
+
+ SolrPluginUtils.setMinShouldMatch(q, "1");
+ assertEquals(1, q.getMinimumNumberShouldMatch());
+
+ SolrPluginUtils.setMinShouldMatch(q, "50%");
+ assertEquals(2, q.getMinimumNumberShouldMatch());
+
+ SolrPluginUtils.setMinShouldMatch(q, "99");
+ assertEquals(4, q.getMinimumNumberShouldMatch());
+
+ q.add(new TermQuery(new Term("a","e")), Occur.MUST);
+ q.add(new TermQuery(new Term("a","f")), Occur.MUST);
+
+ SolrPluginUtils.setMinShouldMatch(q, "50%");
+ assertEquals(2, q.getMinimumNumberShouldMatch());
+
+ }
+
+ /** macro */
+ public String pe(CharSequence s) {
+ return SolrPluginUtils.partialEscape(s).toString();
+ }
+
+ /** macro */
+ public String strip(CharSequence s) {
+ return SolrPluginUtils.stripUnbalancedQuotes(s).toString();
+ }
+
+ /** macro */
+ public int calcMSM(int clauses, String spec) {
+ return SolrPluginUtils.calculateMinShouldMatch(clauses, spec);
+ }
+}
+
Modified: incubator/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml?rev=408103&r1=408102&r2=408103&view=diff
==============================================================================
--- incubator/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ incubator/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml Sat May 20 15:17:21 2006
@@ -166,7 +166,23 @@
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
- <requestHandler name="standard" class="solr.StandardRequestHandler" />
+ <requestHandler name="standard" class="solr.StandardRequestHandler"/>
+ <requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
+ <float name="tie">0.01</float>
+ <str name="qf">
+ text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
+ </str>
+ <str name="pf">
+ text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
+ </str>
+ <str name="bf">
+ ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
+ </str>
+ <str name="mm">
+ 3<-1 5<-2 6<90%
+ </str>
+ <int name="ps">100</int>
+ </requestHandler>
<requestHandler name="old" class="solr.tst.OldRequestHandler" >
<int name="myparam">1000</int>
<float name="ratio">1.4142135</float>