You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2009/11/17 22:46:39 UTC
svn commit: r881546 - in /lucene/solr/trunk: ./
src/java/org/apache/solr/search/ src/test/org/apache/solr/search/
src/test/test-files/solr/conf/
Author: yonik
Date: Tue Nov 17 21:46:38 2009
New Revision: 881546
URL: http://svn.apache.org/viewvc?rev=881546&view=rev
Log:
SOLR-1553: extended dismax parser
Added:
lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (with props)
lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java
lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml
lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt
lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Nov 17 21:46:38 2009
@@ -34,9 +34,14 @@
New Features
----------------------
-1. SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
+* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
+* SOLR-1553: New dismax parser implementation (accessible as "edismax")
+ that supports full lucene syntax, improved reserved char escaping,
+ fielded queries, improved proximity boosting, and improved stopword
+ handling. (yonik)
+
Optimizations
----------------------
Added: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=881546&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Tue Nov 17 21:46:38 2009
@@ -0,0 +1,1128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This parser was originally derived from DismaxQParser from Solr.
+ * All changes are Copyright 2008, Lucid Imagination, Inc.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.DefaultSolrParams;
+import org.apache.solr.common.params.DisMaxParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.search.QueryUtils;
+import org.apache.solr.search.function.BoostedQuery;
+import org.apache.solr.search.function.FunctionQuery;
+import org.apache.solr.search.function.ProductFloatFunction;
+import org.apache.solr.search.function.QueryValueSource;
+import org.apache.solr.search.function.ValueSource;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.analysis.*;
+
+import java.util.*;
+import java.io.Reader;
+import java.io.IOException;
+
+/**
+ * An advanced multi-field query parser.
+ */
+public class ExtendedDismaxQParserPlugin extends QParserPlugin {
+ public static final String NAME = "edismax";
+
+ public void init(NamedList args) {
+ }
+
+ public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ return new ExtendedDismaxQParser(qstr, localParams, params, req);
+ }
+}
+
+
+class ExtendedDismaxQParser extends QParser {
+
+ /**
+ * A field we can't ever find in any schema, so we can safely tell
+ * DisjunctionMaxQueryParser to use it as our defaultField, and
+ * map aliases from it to any field in our schema.
+ */
+ private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
+
+ /** shorten the class references for utilities */
+ private static class U extends SolrPluginUtils {
+ /* :NOOP */
+ }
+
+ /** shorten the class references for utilities */
+ private static interface DMP extends DisMaxParams {
+ /* :NOOP */
+ }
+
+
+ public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ super(qstr, localParams, params, req);
+ }
+
+ Map<String,Float> queryFields;
+ Query parsedUserQuery;
+
+
+ private String[] boostParams;
+ private String[] multBoosts;
+ private List<Query> boostQueries;
+ private Query altUserQuery;
+ private QParser altQParser;
+
+
+ public Query parse() throws ParseException {
+ SolrParams localParams = getLocalParams();
+ SolrParams params = getParams();
+
+ SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params);
+
+ queryFields = U.parseFieldBoosts(solrParams.getParams(DMP.QF));
+ Map<String,Float> phraseFields = U.parseFieldBoosts(solrParams.getParams(DMP.PF));
+ Map<String,Float> phraseFields3 = U.parseFieldBoosts(solrParams.getParams("pf3"));
+
+ float tiebreaker = solrParams.getFloat(DMP.TIE, 0.0f);
+
+ int pslop = solrParams.getInt(DMP.PS, 0);
+ int qslop = solrParams.getInt(DMP.QS, 0);
+
+ // remove stopwords from mandatory "matching" component?
+ boolean stopwords = solrParams.getBool("stopwords", true);
+
+ /* the main query we will execute. we disable the coord because
+ * this query is an artificial construct
+ */
+ BooleanQuery query = new BooleanQuery(true);
+
+ /* * * Main User Query * * */
+ parsedUserQuery = null;
+ String userQuery = getString();
+ altUserQuery = null;
+ if( userQuery == null || userQuery.length() < 1 ) {
+ // If no query is specified, we may have an alternate
+ String altQ = solrParams.get( DMP.ALTQ );
+ if (altQ != null) {
+ altQParser = subQuery(altQ, null);
+ altUserQuery = altQParser.getQuery();
+ query.add( altUserQuery , BooleanClause.Occur.MUST );
+ } else {
+ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "missing query string" );
+ }
+ }
+ else {
+ // There is a valid query string
+ // userQuery = partialEscape(U.stripUnbalancedQuotes(userQuery)).toString();
+
+ boolean lowercaseOperators = solrParams.getBool("lowercaseOperators", true);
+ String mainUserQuery = userQuery;
+
+ ExtendedSolrQueryParser up =
+ new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+ up.addAlias(IMPOSSIBLE_FIELD_NAME,
+ tiebreaker, queryFields);
+ up.setPhraseSlop(qslop); // slop for explicit user phrase queries
+ up.setAllowLeadingWildcard(true);
+
+ // defer escaping and only do if lucene parsing fails, or we need phrases
+ // parsing fails. Need to sloppy phrase queries anyway though.
+ List<Clause> clauses = null;
+ boolean specialSyntax = false;
+ int numPluses = 0;
+ int numMinuses = 0;
+ int numOptional = 0;
+ int numAND = 0;
+ int numOR = 0;
+ int numNOT = 0;
+ boolean sawLowerAnd=false;
+ boolean sawLowerOr=false;
+
+ clauses = splitIntoClauses(userQuery, false);
+ for (Clause clause : clauses) {
+ if (!clause.isPhrase && clause.hasSpecialSyntax) {
+ specialSyntax = true;
+ }
+ if (clause.must == '+') numPluses++;
+ if (clause.must == '-') numMinuses++;
+ if (clause.isBareWord()) {
+ String s = clause.val;
+ if ("AND".equals(s)) {
+ numAND++;
+ } else if ("OR".equals(s)) {
+ numOR++;
+ } else if ("NOT".equals(s)) {
+ numNOT++;
+ } else if (lowercaseOperators) {
+ if ("and".equals(s)) {
+ numAND++;
+ sawLowerAnd=true;
+ } else if ("or".equals(s)) {
+ numOR++;
+ sawLowerOr=true;
+ }
+ }
+ }
+ }
+ numOptional = clauses.size() - (numPluses + numMinuses);
+
+ // convert lower or mixed case operators to uppercase if we saw them.
+ // only do this for the lucene query part and not for phrase query boosting
+ // since some fields might not be case insensitive.
+ // We don't use a regex for this because it might change and AND or OR in
+ // a phrase query in a case sensitive field.
+ if (sawLowerAnd || sawLowerOr) {
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<clauses.size(); i++) {
+ Clause clause = clauses.get(i);
+ String s = clause.raw;
+ // and and or won't be operators at the start or end
+ if (i>0 && i+1<clauses.size()) {
+ if ("AND".equalsIgnoreCase(s)) {
+ s="AND";
+ } else if ("OR".equalsIgnoreCase(s)) {
+ s="OR";
+ }
+ }
+ sb.append(s);
+ sb.append(' ');
+ }
+
+ mainUserQuery = sb.toString();
+ }
+
+ // For correct lucene queries, turn off mm processing if there
+ // were explicit operators (except for AND).
+ boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0;
+
+ try {
+ up.setRemoveStopFilter(!stopwords);
+ parsedUserQuery = up.parse(mainUserQuery);
+
+ if (stopwords && isEmpty(parsedUserQuery)) {
+ // if the query was all stop words, remove none of them
+ up.setRemoveStopFilter(true);
+ parsedUserQuery = up.parse(mainUserQuery);
+ }
+ } catch (Exception e) {
+ // ignore failure and reparse later after escaping reserved chars
+ }
+
+ if (parsedUserQuery != null && doMinMatched) {
+ String minShouldMatch = solrParams.get(DMP.MM, "100%");
+ if (parsedUserQuery instanceof BooleanQuery) {
+ U.setMinShouldMatch((BooleanQuery)parsedUserQuery, minShouldMatch);
+ }
+ }
+
+
+ if (parsedUserQuery == null) {
+ StringBuilder sb = new StringBuilder();
+ for (Clause clause : clauses) {
+
+ boolean doQuote = clause.isPhrase;
+
+ String s=clause.val;
+ if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) {
+ doQuote=true;
+ }
+
+ if (clause.must != 0) {
+ sb.append(clause.must);
+ }
+ if (clause.field != null) {
+ sb.append(clause.field);
+ sb.append(':');
+ }
+ if (doQuote) {
+ sb.append('"');
+ }
+ sb.append(clause.val);
+ if (doQuote) {
+ sb.append('"');
+ }
+ sb.append(' ');
+ }
+ String escapedUserQuery = sb.toString();
+ parsedUserQuery = up.parse(escapedUserQuery);
+
+ // Only do minimum-match logic
+ String minShouldMatch = solrParams.get(DMP.MM, "100%");
+
+ if (parsedUserQuery instanceof BooleanQuery) {
+ BooleanQuery t = new BooleanQuery();
+ U.flattenBooleanQuery(t, (BooleanQuery)parsedUserQuery);
+ U.setMinShouldMatch(t, minShouldMatch);
+ parsedUserQuery = t;
+ }
+ }
+
+ query.add(parsedUserQuery, BooleanClause.Occur.MUST);
+
+ // sloppy phrase queries for proximity
+ if (phraseFields.size() > 0 || phraseFields3.size() > 0) {
+ // find non-field clauses
+ List<Clause> normalClauses = new ArrayList<Clause>(clauses.size());
+ for (Clause clause : clauses) {
+ if (clause.field != null || clause.isPhrase) continue;
+ // check for keywords "AND,OR,TO"
+ if (clause.isBareWord()) {
+ String s = clause.val.toString();
+ // avoid putting explict operators in the phrase query
+ if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue;
+ }
+ normalClauses.add(clause);
+ }
+
+ Map<String,Float> pf = phraseFields;
+ if (normalClauses.size() >= 2 && pf.size() > 0) {
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<normalClauses.size()-1; i++) {
+ sb.append('"');
+ sb.append(normalClauses.get(i).val);
+ sb.append(' ');
+ sb.append(normalClauses.get(i+1).val);
+ sb.append('"');
+ sb.append(' ');
+ }
+
+ String userPhraseQuery = sb.toString();
+
+ /* for parsing sloppy phrases using DisjunctionMaxQueries */
+ ExtendedSolrQueryParser pp =
+ new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+ pp.addAlias(IMPOSSIBLE_FIELD_NAME,
+ tiebreaker, pf);
+ pp.setPhraseSlop(pslop);
+ pp.makeDismax = false; // make boolean queries instead
+ pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
+ pp.minClauseSize = 2; // if a stopword is removed, don't add the phrase
+
+ // TODO: perhaps we shouldn't use synonyms either...
+
+ Query phrase = pp.parse(userPhraseQuery);
+ if (phrase != null) {
+ query.add(phrase, BooleanClause.Occur.SHOULD);
+ }
+ }
+
+ pf = phraseFields3;
+ if (normalClauses.size() >= 3 && pf.size() > 0) {
+ StringBuilder sb = new StringBuilder();
+ for (int i=0; i<normalClauses.size()-2; i++) {
+ sb.append('"');
+ sb.append(normalClauses.get(i).val);
+ sb.append(' ');
+ sb.append(normalClauses.get(i+1).val);
+ sb.append(' ');
+ sb.append(normalClauses.get(i+2).val);
+ sb.append('"');
+ sb.append(' ');
+ }
+
+ String userPhraseQuery = sb.toString();
+
+ /* for parsing sloppy phrases using DisjunctionMaxQueries */
+ ExtendedSolrQueryParser pp =
+ new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+ pp.addAlias(IMPOSSIBLE_FIELD_NAME,
+ tiebreaker, pf);
+ pp.setPhraseSlop(pslop);
+ pp.makeDismax = false; // make boolean queries instead
+ pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
+ pp.minClauseSize = 2; // keep min phrase size at 2 since stopword could have been removed in middle
+
+ Query phrase = pp.parse(userPhraseQuery);
+ if (phrase != null) {
+ query.add(phrase, BooleanClause.Occur.SHOULD);
+ }
+ }
+
+ }
+ }
+
+
+
+ /* * * Boosting Query * * */
+ boostParams = solrParams.getParams(DMP.BQ);
+ //List<Query> boostQueries = U.parseQueryStrings(req, boostParams);
+ boostQueries=null;
+ if (boostParams!=null && boostParams.length>0) {
+ boostQueries = new ArrayList<Query>();
+ for (String qs : boostParams) {
+ if (qs.trim().length()==0) continue;
+ Query q = subQuery(qs, null).getQuery();
+ boostQueries.add(q);
+ }
+ }
+ if (null != boostQueries) {
+ if(1 == boostQueries.size() && 1 == boostParams.length) {
+ /* legacy logic */
+ Query f = boostQueries.get(0);
+ if (1.0f == f.getBoost() && f instanceof BooleanQuery) {
+ /* if the default boost was used, and we've got a BooleanQuery
+ * extract the subqueries out and use them directly
+ */
+ for (Object c : ((BooleanQuery)f).clauses()) {
+ query.add((BooleanClause)c);
+ }
+ } else {
+ query.add(f, BooleanClause.Occur.SHOULD);
+ }
+ } else {
+ for(Query f : boostQueries) {
+ query.add(f, BooleanClause.Occur.SHOULD);
+ }
+ }
+ }
+
+ /* * * Boosting Functions * * */
+
+ String[] boostFuncs = solrParams.getParams(DMP.BF);
+ if (null != boostFuncs && 0 != boostFuncs.length) {
+ for (String boostFunc : boostFuncs) {
+ if(null == boostFunc || "".equals(boostFunc)) continue;
+ Map<String,Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
+ for (String f : ff.keySet()) {
+ Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery();
+ Float b = ff.get(f);
+ if (null != b) {
+ fq.setBoost(b);
+ }
+ query.add(fq, BooleanClause.Occur.SHOULD);
+ }
+ }
+ }
+
+
+ //
+ // create a boosted query (scores multiplied by boosts)
+ //
+ Query topQuery = query;
+ multBoosts = solrParams.getParams("boost");
+ if (multBoosts!=null && multBoosts.length>0) {
+
+ List<ValueSource> boosts = new ArrayList<ValueSource>();
+ for (String boostStr : multBoosts) {
+ if (boostStr==null || boostStr.length()==0) continue;
+ Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery();
+ ValueSource vs;
+ if (boost instanceof FunctionQuery) {
+ vs = ((FunctionQuery)boost).getValueSource();
+ } else {
+ vs = new QueryValueSource(boost, 1.0f);
+ }
+ boosts.add(vs);
+ }
+
+ if (boosts.size()>1) {
+ ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
+ topQuery = new BoostedQuery(query, prod);
+ } else if (boosts.size() == 1) {
+ topQuery = new BoostedQuery(query, boosts.get(0));
+ }
+ }
+
+ return topQuery;
+ }
+
+ @Override
+ public String[] getDefaultHighlightFields() {
+ String[] highFields = queryFields.keySet().toArray(new String[0]);
+ return highFields;
+ }
+
+ @Override
+ public Query getHighlightQuery() throws ParseException {
+ return parsedUserQuery;
+ }
+
+ public void addDebugInfo(NamedList<Object> debugInfo) {
+ super.addDebugInfo(debugInfo);
+ debugInfo.add("altquerystring", altUserQuery);
+ if (null != boostQueries) {
+ debugInfo.add("boost_queries", boostParams);
+ debugInfo.add("parsed_boost_queries",
+ QueryParsing.toString(boostQueries, getReq().getSchema()));
+ }
+ debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF));
+ }
+
+
+
+ public static CharSequence partialEscape(CharSequence s) {
+ StringBuilder sb = new StringBuilder();
+
+ int len = s.length();
+ for (int i = 0; i < len; i++) {
+ char c = s.charAt(i);
+ if (c == ':') {
+ // look forward to make sure it's something that won't
+ // cause a parse exception (something that won't be escaped... like
+ // +,-,:, whitespace
+ if (i+1<len && i>0) {
+ char ch = s.charAt(i+1);
+ if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
+ // OK, at this point the chars after the ':' will be fine.
+ // now look back and try to determine if this is a fieldname
+ // [+,-]? [letter,_] [letter digit,_,-,.]*
+ // This won't cover *all* possible lucene fieldnames, but we should
+ // only pick nice names to begin with
+ int start, pos;
+ for (start=i-1; start>=0; start--) {
+ ch = s.charAt(start);
+ if (Character.isWhitespace(ch)) break;
+ }
+
+ // skip whitespace
+ pos = start+1;
+
+ // skip leading + or -
+ ch = s.charAt(pos);
+ if (ch=='+' || ch=='-') {
+ pos++;
+ }
+
+ // we don't need to explicitly check for end of string
+ // since ':' will act as our sentinal
+
+ // first char can't be '-' or '.'
+ ch = s.charAt(pos++);
+ if (Character.isJavaIdentifierPart(ch)) {
+
+ for(;;) {
+ ch = s.charAt(pos++);
+ if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
+ break;
+ }
+ }
+
+ if (pos<=i) {
+ // OK, we got to the ':' and everything looked like a valid fieldname, so
+ // don't escape the ':'
+ sb.append(':');
+ continue; // jump back to start of outer-most loop
+ }
+
+ }
+
+
+ }
+ }
+
+ // we fell through to here, so we should escape this like other reserved chars.
+ sb.append('\\');
+ }
+ else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
+ c == '^' || c == '[' || c == ']' ||
+ c == '{' || c == '}' || c == '~' || c == '*' || c == '?'
+ )
+ {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb;
+ }
+
+
+ static class Clause {
+
+ boolean isBareWord() {
+ return must==0 && !isPhrase;
+ }
+
+ String field;
+ boolean isPhrase;
+ boolean hasWhitespace;
+ boolean hasSpecialSyntax;
+ boolean syntaxError;
+ char must; // + or -
+ String val; // the field value (minus the field name, +/-, quotes)
+ String raw; // the raw clause w/o leading/trailing whitespace
+ }
+
+
+ public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) {
+ ArrayList<Clause> lst = new ArrayList<Clause>(4);
+ Clause clause = new Clause();
+
+ int pos=0;
+ int end=s.length();
+ char ch=0;
+ int start;
+ outer: while (pos < end) {
+ ch = s.charAt(pos);
+
+ while (Character.isWhitespace(ch)) {
+ if (++pos >= end) break;
+ ch = s.charAt(pos);
+ }
+
+ start = pos;
+
+ if (ch=='+' || ch=='-') {
+ clause.must = ch;
+ pos++;
+ }
+
+ clause.field = getFieldName(s, pos, end);
+ if (clause.field != null) {
+ pos += clause.field.length(); // skip the field name
+ pos++; // skip the ':'
+ }
+
+ if (pos>=end) break;
+
+
+ char inString=0;
+
+ ch = s.charAt(pos);
+ if (!ignoreQuote && ch=='"') {
+ clause.isPhrase = true;
+ inString = '"';
+ pos++;
+ }
+
+ StringBuilder sb = new StringBuilder();
+ while (pos < end) {
+ ch = s.charAt(pos++);
+ if (ch=='\\') { // skip escaped chars, but leave escaped
+ sb.append(ch);
+ if (pos >= end) {
+ sb.append(ch); // double backslash if we are at the end of the string
+ break;
+ }
+ ch = s.charAt(pos++);
+ sb.append(ch);
+ continue;
+ } else if (inString != 0 && ch == inString) {
+ inString=0;
+ break;
+ } else if (Character.isWhitespace(ch)) {
+ clause.hasWhitespace=true;
+ if (inString == 0) {
+ // end of the token if we aren't in a string, backing
+ // up the position.
+ pos--;
+ break;
+ }
+ }
+
+ if (inString == 0) {
+ switch (ch) {
+ case '!':
+ case '(':
+ case ')':
+ case ':':
+ case '^':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '~':
+ case '*':
+ case '?':
+ case '"':
+ case '+':
+ case '-':
+ clause.hasSpecialSyntax = true;
+ sb.append('\\');
+ }
+ } else if (ch=='"') {
+ // only char we need to escape in a string is double quote
+ sb.append('\\');
+ }
+ sb.append(ch);
+ }
+ clause.val = sb.toString();
+
+ if (clause.isPhrase) {
+ if (inString != 0) {
+ // detected bad quote balancing... retry
+ // parsing with quotes like any other char
+ return splitIntoClauses(s, true);
+ }
+
+ // special syntax in a string isn't special
+ clause.hasSpecialSyntax = false;
+ } else {
+ // an empty clause... must be just a + or - on it's own
+ if (clause.val.length() == 0) {
+ clause.syntaxError = true;
+ if (clause.must != 0) {
+ clause.val="\\"+clause.must;
+ clause.must = 0;
+ clause.hasSpecialSyntax = true;
+ } else {
+ // uh.. this shouldn't happen.
+ clause=null;
+ }
+ }
+ }
+
+ if (clause != null) {
+ clause.raw = s.substring(start, pos);
+ lst.add(clause);
+ }
+ clause = new Clause();
+ }
+
+ return lst;
+ }
+
+ public String getFieldName(String s, int pos, int end) {
+ if (pos >= end) return null;
+ int p=pos;
+ int colon = s.indexOf(':',pos);
+ // make sure there is space after the colon, but not whitespace
+ if (colon<=pos || colon+1>=end || Character.isWhitespace(s.charAt(colon+1))) return null;
+ char ch = s.charAt(p++);
+ if (!Character.isJavaIdentifierPart(ch)) return null;
+ while (p<colon) {
+ ch = s.charAt(p++);
+ if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null;
+ }
+ String fname = s.substring(pos, p);
+ return getReq().getSchema().getFieldTypeNoEx(fname) == null ? null : fname;
+ }
+
+
+ public static List<String> split(String s, boolean ignoreQuote) {
+ ArrayList<String> lst = new ArrayList<String>(4);
+ int pos=0, start=0, end=s.length();
+ char inString=0;
+ char ch=0;
+ while (pos < end) {
+ char prevChar=ch;
+ ch = s.charAt(pos++);
+ if (ch=='\\') { // skip escaped chars
+ pos++;
+ } else if (inString != 0 && ch==inString) {
+ inString=0;
+ } else if (!ignoreQuote && ch=='"') {
+ // If char is directly preceeded by a number or letter
+ // then don't treat it as the start of a string.
+ if (!Character.isLetterOrDigit(prevChar)) {
+ inString=ch;
+ }
+ } else if (Character.isWhitespace(ch) && inString==0) {
+ lst.add(s.substring(start,pos-1));
+ start=pos;
+ }
+ }
+ if (start < end) {
+ lst.add(s.substring(start,end));
+ }
+
+ if (inString != 0) {
+ // unbalanced quote... ignore them
+ return split(s, true);
+ }
+
+ return lst;
+ }
+
+
+
+
+ enum QType {
+ FIELD,
+ PHRASE,
+ PREFIX,
+ WILDCARD,
+ FUZZY,
+ RANGE
+ }
+
+ /**
+ * A subclass of SolrQueryParser that supports aliasing fields for
+ * constructing DisjunctionMaxQueries.
+ */
+ class ExtendedSolrQueryParser extends SolrQueryParser {
+
+ /** A simple container for storing alias info
+ */
+ protected class Alias {
+ public float tie;
+ public Map<String,Float> fields;
+ }
+
+ boolean makeDismax=true;
+ boolean disableCoord=true;
+ boolean allowWildcard=true;
+ int minClauseSize = 0; // minimum number of clauses per phrase query...
+ // used when constructing boosting part of query via sloppy phrases
+
+ ExtendedAnalyzer analyzer;
+
+ /**
+ * Where we store a map from field name we expect to see in our query
+ * string, to Alias object containing the fields to use in our
+ * DisjunctionMaxQuery and the tiebreaker to use.
+ */
+ protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);
+
+ public ExtendedSolrQueryParser(QParser parser, String defaultField) {
+ super(parser, defaultField, new ExtendedAnalyzer(parser));
+ analyzer = (ExtendedAnalyzer)getAnalyzer();
+ // don't trust that our parent class won't ever change it's default
+ setDefaultOperator(QueryParser.Operator.OR);
+ }
+
+ public void setRemoveStopFilter(boolean remove) {
+ analyzer.removeStopFilter = remove;
+ }
+
+ protected Query getBooleanQuery(List clauses, boolean disableCoord) throws ParseException {
+ Query q = super.getBooleanQuery(clauses, disableCoord);
+ if (q != null) {
+ q = QueryUtils.makeQueryable(q);
+ }
+ return q;
+ }
+
+
+ ////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+ ////////////////////////////////////////////////////////////////////////////
+
+ protected void addClause(List clauses, int conj, int mods, Query q) {
+//System.out.println("addClause:clauses="+clauses+" conj="+conj+" mods="+mods+" q="+q);
+ super.addClause(clauses, conj, mods, q);
+ }
+
+ /**
+ * Add an alias to this query parser.
+ *
+ * @param field the field name that should trigger alias mapping
+ * @param fieldBoosts the mapping from fieldname to boost value that
+ * should be used to build up the clauses of the
+ * DisjunctionMaxQuery.
+ * @param tiebreaker to the tiebreaker to be used in the
+ * DisjunctionMaxQuery
+ * @see SolrPluginUtils#parseFieldBoosts
+ */
+ public void addAlias(String field, float tiebreaker,
+ Map<String,Float> fieldBoosts) {
+
+ Alias a = new Alias();
+ a.tie = tiebreaker;
+ a.fields = fieldBoosts;
+ aliases.put(field, a);
+ }
+
+
+ QType type;
+ String field;
+ String val;
+ String val2;
+ boolean bool;
+ float flt;
+ int slop;
+
+ @Override
+ protected Query getFieldQuery(String field, String val) throws ParseException {
+//System.out.println("getFieldQuery: val="+val);
+
+ this.type = QType.FIELD;
+ this.field = field;
+ this.val = val;
+ this.slop = getPhraseSlop(); // unspecified
+ return getAliasedQuery();
+ }
+
+ @Override
+ protected Query getFieldQuery(String field, String val, int slop) throws ParseException {
+//System.out.println("getFieldQuery: val="+val+" slop="+slop);
+
+ this.type = QType.PHRASE;
+ this.field = field;
+ this.val = val;
+ this.slop = slop;
+ return getAliasedQuery();
+ }
+
+ @Override
+ protected Query getPrefixQuery(String field, String val) throws ParseException {
+//System.out.println("getPrefixQuery: val="+val);
+ if (val.equals("") && field.equals("*")) {
+ return new MatchAllDocsQuery();
+ }
+ this.type = QType.PREFIX;
+ this.field = field;
+ this.val = val;
+ return getAliasedQuery();
+ }
+
+ @Override
+ protected Query getRangeQuery(String field, String a, String b, boolean inclusive) throws ParseException {
+//System.out.println("getRangeQuery:");
+
+ this.type = QType.RANGE;
+ this.field = field;
+ this.val = a;
+ this.val2 = b;
+ this.bool = inclusive;
+ return getAliasedQuery();
+ }
+
+ @Override
+ protected Query getWildcardQuery(String field, String val) throws ParseException {
+//System.out.println("getWildcardQuery: val="+val);
+
+ if (val.equals("*")) {
+ if (field.equals("*")) {
+ return new MatchAllDocsQuery();
+ } else{
+ return getPrefixQuery(field,"");
+ }
+ }
+ this.type = QType.WILDCARD;
+ this.field = field;
+ this.val = val;
+ return getAliasedQuery();
+ }
+
+ @Override
+ protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws ParseException {
+//System.out.println("getFuzzyQuery: val="+val);
+
+ this.type = QType.FUZZY;
+ this.field = field;
+ this.val = val;
+ this.flt = minSimilarity;
+ return getAliasedQuery();
+ }
+
+ /**
+ * Delegates to the super class unless the field has been specified
+ * as an alias -- in which case we recurse on each of
+ * the aliased fields, and the results are composed into a
+ * DisjunctionMaxQuery. (so yes: aliases which point at other
+ * aliases should work)
+ */
+ protected Query getAliasedQuery()
+ throws ParseException {
+ Alias a = aliases.get(field);
+ if (a != null) {
+ List<Query> lst = getQueries(a);
+ if (lst == null || lst.size()==0)
+ return getQuery();
+ // make a DisjunctionMaxQuery in this case too... it will stop
+ // the "mm" processing from making everything required in the case
+ // that the query expanded to multiple clauses.
+ // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
+ // if (lst.size()==1) return lst.get(0);
+
+ if (makeDismax) {
+ DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie);
+ return q;
+ } else {
+ // should we disable coord?
+ BooleanQuery q = new BooleanQuery(disableCoord);
+ for (Query sub : lst) {
+ q.add(sub, BooleanClause.Occur.SHOULD);
+ }
+ return q;
+ }
+ } else {
+ return getQuery();
+ }
+ }
+
+
+ protected List<Query> getQueries(Alias a) throws ParseException {
+ if (a == null) return null;
+ if (a.fields.size()==0) return null;
+ List<Query> lst= new ArrayList<Query>(4);
+
+ for (String f : a.fields.keySet()) {
+ this.field = f;
+ Query sub = getQuery();
+ if (sub != null) {
+ Float boost = a.fields.get(f);
+ if (boost != null) {
+ sub.setBoost(boost);
+ }
+ lst.add(sub);
+ }
+ }
+ return lst;
+ }
+
+ private Query getQuery() throws ParseException {
+ try {
+
+ switch (type) {
+ case FIELD: // fallthrough
+ case PHRASE:
+ Query query = super.getFieldQuery(field, val);
+ if (query instanceof PhraseQuery) {
+ PhraseQuery pq = (PhraseQuery)query;
+ if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
+ ((PhraseQuery)query).setSlop(slop);
+ } else if (query instanceof MultiPhraseQuery) {
+ MultiPhraseQuery pq = (MultiPhraseQuery)query;
+ if (minClauseSize > 1 && pq.getTermArrays().size() < minClauseSize) return null;
+ ((MultiPhraseQuery)query).setSlop(slop);
+ } else if (minClauseSize > 1) {
+ // if it's not a type of phrase query, it doesn't meet the minClauseSize requirements
+ return null;
+ }
+ return query;
+ case PREFIX: return super.getPrefixQuery(field, val);
+ case WILDCARD: return super.getWildcardQuery(field, val);
+ case FUZZY: return super.getFuzzyQuery(field, val, flt);
+ case RANGE: return super.getRangeQuery(field, val, val2, bool);
+ }
+ return null;
+
+ } catch (Exception e) {
+ // an exception here is due to the field query not being compatible with the input text
+ // for example, passing a string to a numeric field.
+ return null;
+ }
+ }
+ }
+
+
+ static boolean isEmpty(Query q) {
+ if (q==null) return true;
+ if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true;
+ return false;
+ }
+}
+
+
+class ExtendedAnalyzer extends Analyzer {
+ final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
+ final QParser parser;
+ final Analyzer queryAnalyzer;
+ public boolean removeStopFilter = false;
+
+ public static TokenizerChain getQueryTokenizerChain(QParser parser, String fieldName) {
+ FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
+ Analyzer qa = ft.getQueryAnalyzer();
+ return qa instanceof TokenizerChain ? (TokenizerChain)qa : null;
+ }
+
+ public static StopFilterFactory getQueryStopFilter(QParser parser, String fieldName) {
+ TokenizerChain tcq = getQueryTokenizerChain(parser, fieldName);
+ if (tcq == null) return null;
+ TokenFilterFactory[] facs = tcq.getTokenFilterFactories();
+
+ for (int i=0; i<facs.length; i++) {
+ TokenFilterFactory tf = facs[i];
+ if (tf instanceof StopFilterFactory) {
+ return (StopFilterFactory)tf;
+ }
+ }
+ return null;
+ }
+
+ public ExtendedAnalyzer(QParser parser) {
+ this.parser = parser;
+ this.queryAnalyzer = parser.getReq().getSchema().getQueryAnalyzer();
+ }
+
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ if (!removeStopFilter) {
+ return queryAnalyzer.tokenStream(fieldName, reader);
+ }
+
+ Analyzer a = map.get(fieldName);
+ if (a != null) {
+ return a.tokenStream(fieldName, reader);
+ }
+
+ FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
+ Analyzer qa = ft.getQueryAnalyzer();
+ if (!(qa instanceof TokenizerChain)) {
+ map.put(fieldName, qa);
+ return qa.tokenStream(fieldName, reader);
+ }
+ TokenizerChain tcq = (TokenizerChain)qa;
+ Analyzer ia = ft.getAnalyzer();
+ if (ia == qa || !(ia instanceof TokenizerChain)) {
+ map.put(fieldName, qa);
+ return qa.tokenStream(fieldName, reader);
+ }
+ TokenizerChain tci = (TokenizerChain)ia;
+
+ // make sure that there isn't a stop filter in the indexer
+ for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
+ if (tf instanceof StopFilterFactory) {
+ map.put(fieldName, qa);
+ return qa.tokenStream(fieldName, reader);
+ }
+ }
+
+ // now if there is a stop filter in the query analyzer, remove it
+ int stopIdx = -1;
+ TokenFilterFactory[] facs = tcq.getTokenFilterFactories();
+
+ for (int i=0; i<facs.length; i++) {
+ TokenFilterFactory tf = facs[i];
+ if (tf instanceof StopFilterFactory) {
+ stopIdx = i;
+ break;
+ }
+ }
+
+ if (stopIdx == -1) {
+ // no stop filter exists
+ map.put(fieldName, qa);
+ return qa.tokenStream(fieldName, reader);
+ }
+
+ TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length-1];
+ for (int i=0,j=0; i<facs.length; i++) {
+ if (i==stopIdx) continue;
+ newtf[j++] = facs[i];
+ }
+
+ TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
+ newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));
+
+ map.put(fieldName, newa);
+ return newa.tokenStream(fieldName, reader);
+ }
+
+ public int getPositionIncrementGap(String fieldName) {
+ return queryAnalyzer.getPositionIncrementGap(fieldName);
+ }
+
+ public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+ if (!removeStopFilter) {
+ return queryAnalyzer.reusableTokenStream(fieldName, reader);
+ }
+ // TODO: done to fix stop word removal bug - could be done while still using resusable?
+ return tokenStream(fieldName, reader);
+ }
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java Tue Nov 17 21:46:38 2009
@@ -32,6 +32,7 @@
PrefixQParserPlugin.NAME, PrefixQParserPlugin.class,
BoostQParserPlugin.NAME, BoostQParserPlugin.class,
DisMaxQParserPlugin.NAME, DisMaxQParserPlugin.class,
+ ExtendedDismaxQParserPlugin.NAME, ExtendedDismaxQParserPlugin.class,
FieldQParserPlugin.NAME, FieldQParserPlugin.class,
RawQParserPlugin.NAME, RawQParserPlugin.class,
NestedQParserPlugin.NAME, NestedQParserPlugin.class,
Added: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java?rev=881546&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java Tue Nov 17 21:46:38 2009
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+
+public class TestExtendedDismaxParser extends AbstractSolrTestCase {
+ public String getSchemaFile() { return "schema12.xml"; }
+ public String getSolrConfigFile() { return "solrconfig.xml"; }
+ // public String getCoreName() { return "collection1"; }
+
+ public void setUp() throws Exception {
+ // if you override setUp or tearDown, you better call
+ // the super classes version
+ super.setUp();
+ }
+ public void tearDown() throws Exception {
+ // if you override setUp or tearDown, you better call
+ // the super classes version
+ super.tearDown();
+ }
+
+ // test the edismax query parser based on the dismax parser
+ public void testFocusQueryParser() {
+ assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
+ "name", "Zapp Brannigan"));
+ assertU(adoc("id", "43" ,
+ "title", "Democratic Order op Planets"));
+ assertU(adoc("id", "44", "trait_ss", "Tool",
+ "name", "The Zapper"));
+ assertU(adoc("id", "45", "trait_ss", "Chauvinist",
+ "title", "25 star General"));
+ assertU(adoc("id", "46", "trait_ss", "Obnoxious",
+ "subject", "Defeated the pacifists op the Gandhi nebula"));
+ assertU(adoc("id", "47", "trait_ss", "Pig",
+ "text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
+ assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
+ assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
+ assertU(adoc("id", "50", "text_sw", "start new big city end"));
+
+ assertU(commit());
+ String allq = "id:[42 TO 50]";
+ String allr = "*[count(//doc)=9]";
+ String oner = "*[count(//doc)=1]";
+ String twor = "*[count(//doc)=2]";
+ String nor = "*[count(//doc)=0]";
+
+
+ assertQ("standard request handler returns all matches",
+ req(allq),
+ allr
+ );
+
+ assertQ("edismax query parser returns all matches",
+ req("q", allq,
+ "defType", "edismax"
+ ),
+ allr
+ );
+
+ assertQ(req("defType", "edismax", "qf", "trait_ss",
+ "q","Tool"), twor
+ );
+
+ // test that field types that aren't applicable don't cause an exception to be thrown
+ assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b",
+ "q","Tool"), twor
+ );
+
+ // test that numeric field types can be queried
+ assertQ(req("defType", "edismax", "qf", "text_sw",
+ "q","foo_i:100"), oner
+ );
+
+ // test that numeric field types can be queried
+ assertQ(req("defType", "edismax", "qf", "text_sw",
+ "q","foo_i:-100"), oner
+ );
+
+ // test that numeric field types can be queried via qf
+ assertQ(req("defType", "edismax", "qf", "text_sw foo_i",
+ "q","100"), oner
+ );
+
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","op"), twor
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","Order op"), oner
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","Order AND op"), oner
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","Order and op"), oner
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","+Order op"), oner
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","Order OR op"), twor
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","Order or op"), twor
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","*:*"), allr
+ );
+
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","star OR (-star)"), allr
+ );
+ assertQ(req("defType", "edismax", "qf", "name title subject text",
+ "q","id:42 OR (-id:42)"), allr
+ );
+
+ // test that basic synonyms work
+ assertQ(req("defType", "edismax", "qf", "text_sw",
+ "q","GB"), oner
+ );
+
+ // test for stopword removal in main query part
+ assertQ(req("defType", "edismax", "qf", "text_sw",
+ "q","the big"), twor
+ );
+
+ // test for stopwords not removed
+ assertQ(req("defType", "edismax", "qf", "text_sw", "stopwords","false",
+ "q","the big"), oner
+ );
+
+ /** stopword removal in conjunction with multi-word synonyms at query time
+ * break this test.
+ // multi-word synonyms
+ // remove id:50 which contans the false match
+ assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true",
+ "q","-id:50 nyc"), oner
+ );
+ **/
+
+ /*** these fail because multi-word synonyms are being used at query time
+ // this will incorrectly match "new big city"
+ assertQ(req("defType", "edismax", "qf", "id title",
+ "q","nyc"), oner
+ );
+
+ // this will incorrectly match "new big city"
+ assertQ(req("defType", "edismax", "qf", "title",
+ "q","the big apple"), nor
+ );
+ ***/
+
+ }
+
+}
\ No newline at end of file
Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml Tue Nov 17 21:46:38 2009
@@ -339,7 +339,7 @@
<fieldtype name="syn" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
+ <filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -350,7 +350,7 @@
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
- synonyms="synonyms.txt" expand="true" />
+ synonyms="old_synonyms.txt" expand="true" />
<filter class="solr.EnglishPorterFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml Tue Nov 17 21:46:38 2009
@@ -325,6 +325,32 @@
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
+
+ <!-- a text field with the stop filter only on the query analyzer
+ -->
+ <fieldType name="text_sw" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
+ catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
+ catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
synonyms "better"
@@ -461,9 +487,11 @@
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
+ <dynamicField name="*_sw" type="text_sw" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt Tue Nov 17 21:46:38 2009
@@ -1,16 +1,58 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-stopworda
-stopwordb
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt Tue Nov 17 21:46:38 2009
@@ -1,22 +1,31 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-a => aa
-b => b1 b2
-c => c1,c2
-a\=>a => b\=>b
-a\,a => b\,b
-foo,bar,baz
-
-Television,TV,Televisions
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+