You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2009/11/17 22:46:39 UTC
svn commit: r881546 - in /lucene/solr/trunk: ./ src/java/org/apache/solr/search/ src/test/org/apache/solr/search/ src/test/test-files/solr/conf/

Author: yonik
Date: Tue Nov 17 21:46:38 2009
New Revision: 881546

URL: http://svn.apache.org/viewvc?rev=881546&view=rev
Log:
SOLR-1553: extended dismax parser

Added:
    lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java   (with props)
    lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java   (with props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java
    lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
    lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml
    lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt
    lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Tue Nov 17 21:46:38 2009
@@ -34,9 +34,14 @@
 New Features
 ----------------------
 
-1. SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
+* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
   Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
 
+* SOLR-1553: New dismax parser implementation (accessible as "edismax")
+  that supports full lucene syntax, improved reserved char escaping,
+  fielded queries, improved proximity boosting, and improved stopword
+  handling. (yonik)
+
 Optimizations
 ----------------------
 

Added: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java?rev=881546&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java Tue Nov 17 21:46:38 2009
@@ -0,0 +1,1128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This parser was originally derived from DismaxQParser from Solr.
+ * All changes are Copyright 2008, Lucid Imagination, Inc.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.DefaultSolrParams;
+import org.apache.solr.common.params.DisMaxParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.search.QueryUtils;
+import org.apache.solr.search.function.BoostedQuery;
+import org.apache.solr.search.function.FunctionQuery;
+import org.apache.solr.search.function.ProductFloatFunction;
+import org.apache.solr.search.function.QueryValueSource;
+import org.apache.solr.search.function.ValueSource;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.analysis.*;
+
+import java.util.*;
+import java.io.Reader;
+import java.io.IOException;
+
+/**
+ * An advanced multi-field query parser.
+ */
+public class ExtendedDismaxQParserPlugin extends QParserPlugin {
+  public static final String NAME = "edismax";
+
+  public void init(NamedList args) {
+  }
+
+  public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+    return new ExtendedDismaxQParser(qstr, localParams, params, req);
+  }
+}
+
+
+class ExtendedDismaxQParser extends QParser {
+
+  /**
+   * A field we can't ever find in any schema, so we can safely tell
+   * DisjunctionMaxQueryParser to use it as our defaultField, and
+   * map aliases from it to any field in our schema.
+   */
+  private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
+
+  /** shorten the class references for utilities */
+  private static class U extends SolrPluginUtils {
+    /* :NOOP */
+  }
+
+  /** shorten the class references for utilities */
+  private static interface DMP extends DisMaxParams {
+    /* :NOOP */
+  }
+
+
+  public ExtendedDismaxQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+    super(qstr, localParams, params, req);
+  }
+
+  Map<String,Float> queryFields;
+  Query parsedUserQuery;
+
+
+  private String[] boostParams;
+  private String[] multBoosts;
+  private List<Query> boostQueries;
+  private Query altUserQuery;
+  private QParser altQParser;
+
+
+  public Query parse() throws ParseException {
+    SolrParams localParams = getLocalParams();
+    SolrParams params = getParams();
+    
+    SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params);
+
+    queryFields = U.parseFieldBoosts(solrParams.getParams(DMP.QF));
+    Map<String,Float> phraseFields = U.parseFieldBoosts(solrParams.getParams(DMP.PF));
+    Map<String,Float> phraseFields3 = U.parseFieldBoosts(solrParams.getParams("pf3"));
+
+    float tiebreaker = solrParams.getFloat(DMP.TIE, 0.0f);
+
+    int pslop = solrParams.getInt(DMP.PS, 0);
+    int qslop = solrParams.getInt(DMP.QS, 0);
+
+    // remove stopwords from mandatory "matching" component?
+    boolean stopwords = solrParams.getBool("stopwords", true);
+
+    /* the main query we will execute.  we disable the coord because
+     * this query is an artificial construct
+     */
+    BooleanQuery query = new BooleanQuery(true);
+
+    /* * * Main User Query * * */
+    parsedUserQuery = null;
+    String userQuery = getString();
+    altUserQuery = null;
+    if( userQuery == null || userQuery.length() < 1 ) {
+      // If no query is specified, we may have an alternate
+      String altQ = solrParams.get( DMP.ALTQ );
+      if (altQ != null) {
+        altQParser = subQuery(altQ, null);
+        altUserQuery = altQParser.getQuery();
+        query.add( altUserQuery , BooleanClause.Occur.MUST );
+      } else {
+        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "missing query string" );
+      }
+    }
+    else {     
+      // There is a valid query string
+      // userQuery = partialEscape(U.stripUnbalancedQuotes(userQuery)).toString();
+
+      boolean lowercaseOperators = solrParams.getBool("lowercaseOperators", true);
+      String mainUserQuery = userQuery;
+
+      ExtendedSolrQueryParser up =
+        new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+      up.addAlias(IMPOSSIBLE_FIELD_NAME,
+                tiebreaker, queryFields);
+      up.setPhraseSlop(qslop);     // slop for explicit user phrase queries
+      up.setAllowLeadingWildcard(true);
+
+      // defer escaping and only do if lucene parsing fails, or we need phrases
+      // parsing fails.  Need to sloppy phrase queries anyway though.
+      List<Clause> clauses = null;
+      boolean specialSyntax = false;
+      int numPluses = 0;
+      int numMinuses = 0;
+      int numOptional = 0;
+      int numAND = 0;
+      int numOR = 0;
+      int numNOT = 0;
+      boolean sawLowerAnd=false;
+      boolean sawLowerOr=false;
+
+      clauses = splitIntoClauses(userQuery, false);
+      for (Clause clause : clauses) {
+        if (!clause.isPhrase && clause.hasSpecialSyntax) {
+          specialSyntax = true;
+        }
+        if (clause.must == '+') numPluses++;
+        if (clause.must == '-') numMinuses++;
+        if (clause.isBareWord()) {
+          String s = clause.val;
+          if ("AND".equals(s)) {
+            numAND++;
+          } else if ("OR".equals(s)) {
+            numOR++;
+          } else if ("NOT".equals(s)) {
+            numNOT++;
+          } else if (lowercaseOperators) {
+            if ("and".equals(s)) {
+              numAND++;
+              sawLowerAnd=true;
+            } else if ("or".equals(s)) {
+              numOR++;
+              sawLowerOr=true;
+            }
+          }
+        }
+      }
+      numOptional = clauses.size() - (numPluses + numMinuses);
+
+      // convert lower or mixed case operators to uppercase if we saw them.
+      // only do this for the lucene query part and not for phrase query boosting
+      // since some fields might not be case insensitive.
+      // We don't use a regex for this because it might change and AND or OR in
+      // a phrase query in a case sensitive field.
+      if (sawLowerAnd || sawLowerOr) {
+        StringBuilder sb = new StringBuilder();
+        for (int i=0; i<clauses.size(); i++) {
+          Clause clause = clauses.get(i);
+          String s = clause.raw;
+          // and and or won't be operators at the start or end
+          if (i>0 && i+1<clauses.size()) {
+            if ("AND".equalsIgnoreCase(s)) {
+              s="AND";
+            } else if ("OR".equalsIgnoreCase(s)) {
+              s="OR";
+            }
+          }
+          sb.append(s);
+          sb.append(' ');
+        }
+
+        mainUserQuery = sb.toString();
+      }
+
+      // For correct lucene queries, turn off mm processing if there
+      // were explicit operators (except for AND).
+      boolean doMinMatched = (numOR + numNOT + numPluses + numMinuses) == 0;
+
+      try {
+        up.setRemoveStopFilter(!stopwords);
+        parsedUserQuery = up.parse(mainUserQuery);
+
+        if (stopwords && isEmpty(parsedUserQuery)) {
+         // if the query was all stop words, remove none of them
+          up.setRemoveStopFilter(true);
+          parsedUserQuery = up.parse(mainUserQuery);          
+        }
+      } catch (Exception e) {
+        // ignore failure and reparse later after escaping reserved chars
+      }
+
+      if (parsedUserQuery != null && doMinMatched) {
+        String minShouldMatch = solrParams.get(DMP.MM, "100%");
+        if (parsedUserQuery instanceof BooleanQuery) {
+          U.setMinShouldMatch((BooleanQuery)parsedUserQuery, minShouldMatch);
+        }
+      }
+
+
+      if (parsedUserQuery == null) {
+        StringBuilder sb = new StringBuilder();
+        for (Clause clause : clauses) {
+
+          boolean doQuote = clause.isPhrase;
+
+          String s=clause.val;
+          if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) {
+            doQuote=true;
+          }
+
+          if (clause.must != 0) {
+            sb.append(clause.must);
+          }
+          if (clause.field != null) {
+            sb.append(clause.field);
+            sb.append(':');
+          }
+          if (doQuote) {
+            sb.append('"');
+          }
+          sb.append(clause.val);
+          if (doQuote) {
+            sb.append('"');
+          }
+          sb.append(' ');
+        }
+        String escapedUserQuery = sb.toString();
+        parsedUserQuery = up.parse(escapedUserQuery);
+
+        // Only do minimum-match logic
+        String minShouldMatch = solrParams.get(DMP.MM, "100%");
+
+        if (parsedUserQuery instanceof BooleanQuery) {
+          BooleanQuery t = new BooleanQuery();
+          U.flattenBooleanQuery(t, (BooleanQuery)parsedUserQuery);
+          U.setMinShouldMatch(t, minShouldMatch);
+          parsedUserQuery = t;
+        }
+      }
+
+      query.add(parsedUserQuery, BooleanClause.Occur.MUST);
+
+      // sloppy phrase queries for proximity
+      if (phraseFields.size() > 0 || phraseFields3.size() > 0) {
+        // find non-field clauses
+        List<Clause> normalClauses = new ArrayList<Clause>(clauses.size());
+        for (Clause clause : clauses) {
+          if (clause.field != null || clause.isPhrase) continue;
+          // check for keywords "AND,OR,TO"
+          if (clause.isBareWord()) {
+            String s = clause.val.toString();
+            // avoid putting explict operators in the phrase query
+            if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue;
+          }
+          normalClauses.add(clause);
+        }
+
+        Map<String,Float> pf = phraseFields;
+        if (normalClauses.size() >= 2 && pf.size() > 0) {
+          StringBuilder sb = new StringBuilder();
+          for (int i=0; i<normalClauses.size()-1; i++) {
+            sb.append('"');
+            sb.append(normalClauses.get(i).val);
+            sb.append(' ');
+            sb.append(normalClauses.get(i+1).val);
+            sb.append('"');
+            sb.append(' ');
+          }
+
+          String userPhraseQuery = sb.toString();
+
+          /* for parsing sloppy phrases using DisjunctionMaxQueries */
+          ExtendedSolrQueryParser pp =
+                  new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+          pp.addAlias(IMPOSSIBLE_FIELD_NAME,
+                  tiebreaker, pf);
+          pp.setPhraseSlop(pslop);
+          pp.makeDismax = false;  // make boolean queries instead
+          pp.setRemoveStopFilter(true);  // remove stop filter and keep stopwords
+          pp.minClauseSize = 2;  // if a stopword is removed, don't add the phrase
+
+          // TODO: perhaps we shouldn't use synonyms either...
+
+          Query phrase = pp.parse(userPhraseQuery);
+          if (phrase != null) {
+            query.add(phrase, BooleanClause.Occur.SHOULD);
+          }
+        }
+
+        pf = phraseFields3;
+        if (normalClauses.size() >= 3 && pf.size() > 0) {
+          StringBuilder sb = new StringBuilder();
+          for (int i=0; i<normalClauses.size()-2; i++) {
+            sb.append('"');
+            sb.append(normalClauses.get(i).val);
+            sb.append(' ');
+            sb.append(normalClauses.get(i+1).val);
+            sb.append(' ');
+            sb.append(normalClauses.get(i+2).val);
+            sb.append('"');
+            sb.append(' ');
+          }
+
+          String userPhraseQuery = sb.toString();
+
+          /* for parsing sloppy phrases using DisjunctionMaxQueries */
+          ExtendedSolrQueryParser pp =
+                  new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME);
+          pp.addAlias(IMPOSSIBLE_FIELD_NAME,
+                  tiebreaker, pf);
+          pp.setPhraseSlop(pslop);
+          pp.makeDismax = false;  // make boolean queries instead
+          pp.setRemoveStopFilter(true);  // remove stop filter and keep stopwords
+          pp.minClauseSize = 2;  // keep min phrase size at 2 since stopword could have been removed in middle
+
+          Query phrase = pp.parse(userPhraseQuery);
+          if (phrase != null) {
+            query.add(phrase, BooleanClause.Occur.SHOULD);
+          }
+        }
+
+      }
+    }
+
+
+
+    /* * * Boosting Query * * */
+    boostParams = solrParams.getParams(DMP.BQ);
+    //List<Query> boostQueries = U.parseQueryStrings(req, boostParams);
+    boostQueries=null;
+    if (boostParams!=null && boostParams.length>0) {
+      boostQueries = new ArrayList<Query>();
+      for (String qs : boostParams) {
+        if (qs.trim().length()==0) continue;
+        Query q = subQuery(qs, null).getQuery();
+        boostQueries.add(q);
+      }
+    }
+    if (null != boostQueries) {
+      if(1 == boostQueries.size() && 1 == boostParams.length) {
+        /* legacy logic */
+        Query f = boostQueries.get(0);
+        if (1.0f == f.getBoost() && f instanceof BooleanQuery) {
+          /* if the default boost was used, and we've got a BooleanQuery
+           * extract the subqueries out and use them directly
+           */
+          for (Object c : ((BooleanQuery)f).clauses()) {
+            query.add((BooleanClause)c);
+          }
+        } else {
+          query.add(f, BooleanClause.Occur.SHOULD);
+        }
+      } else {
+        for(Query f : boostQueries) {
+          query.add(f, BooleanClause.Occur.SHOULD);
+        }
+      }
+    }
+
+    /* * * Boosting Functions * * */
+
+    String[] boostFuncs = solrParams.getParams(DMP.BF);
+    if (null != boostFuncs && 0 != boostFuncs.length) {
+      for (String boostFunc : boostFuncs) {
+        if(null == boostFunc || "".equals(boostFunc)) continue;
+        Map<String,Float> ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
+        for (String f : ff.keySet()) {
+          Query fq = subQuery(f, FunctionQParserPlugin.NAME).getQuery();
+          Float b = ff.get(f);
+          if (null != b) {
+            fq.setBoost(b);
+          }
+          query.add(fq, BooleanClause.Occur.SHOULD);
+        }
+      }
+    }
+
+
+    //
+    // create a boosted query (scores multiplied by boosts)
+    //
+    Query topQuery = query;
+    multBoosts = solrParams.getParams("boost");
+    if (multBoosts!=null && multBoosts.length>0) {
+
+      List<ValueSource> boosts = new ArrayList<ValueSource>();
+      for (String boostStr : multBoosts) {
+        if (boostStr==null || boostStr.length()==0) continue;
+        Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery();
+        ValueSource vs;
+        if (boost instanceof FunctionQuery) {
+          vs = ((FunctionQuery)boost).getValueSource();
+        } else {
+          vs = new QueryValueSource(boost, 1.0f);
+        }
+        boosts.add(vs);
+      }
+
+      if (boosts.size()>1) {
+        ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[boosts.size()]));
+        topQuery = new BoostedQuery(query, prod);
+      } else if (boosts.size() == 1) {
+        topQuery = new BoostedQuery(query, boosts.get(0));
+      }
+    }
+
+    return topQuery;
+  }
+
+  @Override
+  public String[] getDefaultHighlightFields() {
+    String[] highFields = queryFields.keySet().toArray(new String[0]);
+    return highFields;
+  }
+
+  @Override
+  public Query getHighlightQuery() throws ParseException {
+    return parsedUserQuery;
+  }
+
+  public void addDebugInfo(NamedList<Object> debugInfo) {
+    super.addDebugInfo(debugInfo);
+    debugInfo.add("altquerystring", altUserQuery);
+    if (null != boostQueries) {
+      debugInfo.add("boost_queries", boostParams);
+      debugInfo.add("parsed_boost_queries",
+                QueryParsing.toString(boostQueries, getReq().getSchema()));
+    }
+    debugInfo.add("boostfuncs", getReq().getParams().getParams(DisMaxParams.BF));
+  }
+
+
+  
+  public static CharSequence partialEscape(CharSequence s) {
+    StringBuilder sb = new StringBuilder();
+
+    int len = s.length();
+    for (int i = 0; i < len; i++) {
+      char c = s.charAt(i);
+      if (c == ':') {
+        // look forward to make sure it's something that won't
+        // cause a parse exception (something that won't be escaped... like
+        // +,-,:, whitespace
+        if (i+1<len && i>0) {
+          char ch = s.charAt(i+1);
+          if (!(Character.isWhitespace(ch) || ch=='+' || ch=='-' || ch==':')) {
+            // OK, at this point the chars after the ':' will be fine.
+            // now look back and try to determine if this is a fieldname
+            // [+,-]? [letter,_] [letter digit,_,-,.]*
+            // This won't cover *all* possible lucene fieldnames, but we should
+            // only pick nice names to begin with
+            int start, pos;
+            for (start=i-1; start>=0; start--) {
+              ch = s.charAt(start);
+              if (Character.isWhitespace(ch)) break;
+            }
+
+            // skip whitespace
+            pos = start+1;
+
+            // skip leading + or -
+            ch = s.charAt(pos);
+            if (ch=='+' || ch=='-') {
+              pos++;
+            }
+
+            // we don't need to explicitly check for end of string
+            // since ':' will act as our sentinal
+
+              // first char can't be '-' or '.'
+              ch = s.charAt(pos++);
+              if (Character.isJavaIdentifierPart(ch)) {
+
+                for(;;) {
+                  ch = s.charAt(pos++);
+                  if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) {
+                    break;
+                  }
+                }
+
+                if (pos<=i) {
+                  // OK, we got to the ':' and everything looked like a valid fieldname, so
+                  // don't escape the ':'
+                  sb.append(':');
+                  continue;  // jump back to start of outer-most loop
+                }
+
+              }
+
+
+          }
+        }
+
+        // we fell through to here, so we should escape this like other reserved chars.
+        sb.append('\\');
+      }
+      else if (c == '\\' || c == '!' || c == '(' || c == ')' ||
+          c == '^' || c == '[' || c == ']' ||
+          c == '{'  || c == '}' || c == '~' || c == '*' || c == '?'
+          )
+      {
+        sb.append('\\');
+      }
+      sb.append(c);
+    }
+    return sb;
+  }
+
+
+  static class Clause {
+
+    boolean isBareWord() {
+      return must==0 && !isPhrase;
+    }
+
+    String field;
+    boolean isPhrase;
+    boolean hasWhitespace;
+    boolean hasSpecialSyntax;
+    boolean syntaxError;
+    char must;   // + or -
+    String val;  // the field value (minus the field name, +/-, quotes)
+    String raw;  // the raw clause w/o leading/trailing whitespace
+  }
+
+  
+  public List<Clause> splitIntoClauses(String s, boolean ignoreQuote) {
+    ArrayList<Clause> lst = new ArrayList<Clause>(4);
+    Clause clause = new Clause();
+
+    int pos=0;
+    int end=s.length();
+    char ch=0;
+    int start;
+    outer: while (pos < end) {
+      ch = s.charAt(pos);
+
+      while (Character.isWhitespace(ch)) {
+        if (++pos >= end) break;
+        ch = s.charAt(pos);
+      }
+
+      start = pos;      
+
+      if (ch=='+' || ch=='-') {
+        clause.must = ch;
+        pos++;
+      }
+
+      clause.field = getFieldName(s, pos, end);
+      if (clause.field != null) {
+        pos += clause.field.length(); // skip the field name
+        pos++;  // skip the ':'
+      }
+
+      if (pos>=end) break;
+
+
+      char inString=0;
+
+      ch = s.charAt(pos);
+      if (!ignoreQuote && ch=='"') {
+        clause.isPhrase = true;
+        inString = '"';
+        pos++;
+      }
+
+      StringBuilder sb = new StringBuilder();
+      while (pos < end) {
+        ch = s.charAt(pos++);
+        if (ch=='\\') {    // skip escaped chars, but leave escaped
+          sb.append(ch);
+          if (pos >= end) {
+            sb.append(ch); // double backslash if we are at the end of the string
+            break;
+          }
+          ch = s.charAt(pos++);
+          sb.append(ch);
+          continue;
+        } else if (inString != 0 && ch == inString) {
+          inString=0;
+          break;
+        } else if (Character.isWhitespace(ch)) {
+          clause.hasWhitespace=true;
+          if (inString == 0) {
+            // end of the token if we aren't in a string, backing
+            // up the position.
+            pos--;
+            break;
+          }
+        }
+
+        if (inString == 0) {
+          switch (ch) {
+            case '!':
+            case '(':
+            case ')':
+            case ':':
+            case '^':
+            case '[':
+            case ']':
+            case '{':
+            case '}':
+            case '~':
+            case '*':
+            case '?':
+            case '"':
+            case '+':
+            case '-':
+              clause.hasSpecialSyntax = true;
+              sb.append('\\');
+          }
+        } else if (ch=='"') {
+          // only char we need to escape in a string is double quote
+          sb.append('\\');
+        }
+        sb.append(ch);
+      }
+      clause.val = sb.toString();
+ 
+      if (clause.isPhrase) {
+        if (inString != 0) {
+          // detected bad quote balancing... retry
+          // parsing with quotes like any other char
+          return splitIntoClauses(s, true);
+        }
+
+        // special syntax in a string isn't special
+        clause.hasSpecialSyntax = false;        
+      } else {
+        // an empty clause... must be just a + or - on it's own
+        if (clause.val.length() == 0) {
+          clause.syntaxError = true;
+          if (clause.must != 0) {
+            clause.val="\\"+clause.must;
+            clause.must = 0;
+            clause.hasSpecialSyntax = true;
+          } else {
+            // uh.. this shouldn't happen.
+            clause=null;
+          }
+        }
+      }
+
+      if (clause != null) {
+        clause.raw = s.substring(start, pos);
+        lst.add(clause);
+      }
+      clause = new Clause();
+    }
+
+    return lst;
+  }
+
+  public String getFieldName(String s, int pos, int end) {
+    if (pos >= end) return null;
+    int p=pos;
+    int colon = s.indexOf(':',pos);
+    // make sure there is space after the colon, but not whitespace
+    if (colon<=pos || colon+1>=end || Character.isWhitespace(s.charAt(colon+1))) return null;
+    char ch = s.charAt(p++);
+    if (!Character.isJavaIdentifierPart(ch)) return null;
+    while (p<colon) {
+      ch = s.charAt(p++);
+      if (!(Character.isJavaIdentifierPart(ch) || ch=='-' || ch=='.')) return null;
+    }
+    String fname = s.substring(pos, p);
+    return getReq().getSchema().getFieldTypeNoEx(fname) == null ? null : fname;
+  }
+
+
+  public static List<String> split(String s, boolean ignoreQuote) {
+    ArrayList<String> lst = new ArrayList<String>(4);
+    int pos=0, start=0, end=s.length();
+    char inString=0;
+    char ch=0;
+    while (pos < end) {
+      char prevChar=ch;
+      ch = s.charAt(pos++);
+      if (ch=='\\') {    // skip escaped chars
+        pos++;
+      } else if (inString != 0 && ch==inString) {
+        inString=0;
+      } else if (!ignoreQuote && ch=='"') {
+        // If char is directly preceeded by a number or letter
+        // then don't treat it as the start of a string.
+        if (!Character.isLetterOrDigit(prevChar)) {
+          inString=ch;
+        }
+      } else if (Character.isWhitespace(ch) && inString==0) {
+        lst.add(s.substring(start,pos-1));
+        start=pos;
+      }
+    }
+    if (start < end) {
+      lst.add(s.substring(start,end));
+    }
+
+    if (inString != 0) {
+      // unbalanced quote... ignore them
+      return split(s, true);
+    }
+
+    return lst;
+  }
+
+
+
+
+    enum QType {
+      FIELD,
+      PHRASE,
+      PREFIX,
+      WILDCARD,
+      FUZZY,
+      RANGE
+    }
+
+  /**
+   * A subclass of SolrQueryParser that supports aliasing fields for
+   * constructing DisjunctionMaxQueries.
+   */
+  class ExtendedSolrQueryParser extends SolrQueryParser {
+
+    /** A simple container for storing alias info
+     */
+    protected class Alias {
+      public float tie;
+      public Map<String,Float> fields;
+    }
+
+    boolean makeDismax=true;
+    boolean disableCoord=true;
+    boolean allowWildcard=true;
+    int minClauseSize = 0;    // minimum number of clauses per phrase query...
+                              // used when constructing boosting part of query via sloppy phrases
+
+    ExtendedAnalyzer analyzer;
+
+    /**
+     * Where we store a map from field name we expect to see in our query
+     * string, to Alias object containing the fields to use in our
+     * DisjunctionMaxQuery and the tiebreaker to use.
+     */
+    protected Map<String,Alias> aliases = new HashMap<String,Alias>(3);
+
+    public ExtendedSolrQueryParser(QParser parser, String defaultField) {
+      super(parser, defaultField, new ExtendedAnalyzer(parser));
+      analyzer = (ExtendedAnalyzer)getAnalyzer();      
+      // don't trust that our parent class won't ever change it's default
+      setDefaultOperator(QueryParser.Operator.OR);
+    }
+
+    public void setRemoveStopFilter(boolean remove) {
+      analyzer.removeStopFilter = remove;
+    }
+
+    protected Query getBooleanQuery(List clauses, boolean disableCoord) throws ParseException {
+      Query q = super.getBooleanQuery(clauses, disableCoord);
+      if (q != null) {
+        q = QueryUtils.makeQueryable(q);
+      }
+      return q;
+    }
+
+
+    ////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////
+    ////////////////////////////////////////////////////////////////////////////
+
+    protected void addClause(List clauses, int conj, int mods, Query q) {
+//System.out.println("addClause:clauses="+clauses+" conj="+conj+" mods="+mods+" q="+q);
+      super.addClause(clauses, conj, mods, q);
+    }
+
+    /**
+     * Add an alias to this query parser.
+     *
+     * @param field the field name that should trigger alias mapping
+     * @param fieldBoosts the mapping from fieldname to boost value that
+     *                    should be used to build up the clauses of the
+     *                    DisjunctionMaxQuery.
+     * @param tiebreaker to the tiebreaker to be used in the
+     *                   DisjunctionMaxQuery
+     * @see SolrPluginUtils#parseFieldBoosts
+     */
+    public void addAlias(String field, float tiebreaker,
+                         Map<String,Float> fieldBoosts) {
+
+      Alias a = new Alias();
+      a.tie = tiebreaker;
+      a.fields = fieldBoosts;
+      aliases.put(field, a);
+    }
+
+
+    QType type;
+    String field;
+    String val;
+    String val2;
+    boolean bool;
+    float flt;
+    int slop;
+
+    @Override
+    protected Query getFieldQuery(String field, String val) throws ParseException {
+//System.out.println("getFieldQuery: val="+val);
+
+      this.type = QType.FIELD;
+      this.field = field;
+      this.val = val;
+      this.slop = getPhraseSlop(); // unspecified
+      return getAliasedQuery();
+    }
+
+    @Override
+    protected Query getFieldQuery(String field, String val, int slop) throws ParseException {
+//System.out.println("getFieldQuery: val="+val+" slop="+slop);
+
+      this.type = QType.PHRASE;
+      this.field = field;
+      this.val = val;
+      this.slop = slop;
+      return getAliasedQuery();
+    }
+
+    @Override
+    protected Query getPrefixQuery(String field, String val) throws ParseException {
+//System.out.println("getPrefixQuery: val="+val);
+      if (val.equals("") && field.equals("*")) {
+        return new MatchAllDocsQuery();
+      }
+      this.type = QType.PREFIX;
+      this.field = field;
+      this.val = val;
+      return getAliasedQuery();
+    }
+
+    @Override
+    protected Query getRangeQuery(String field, String a, String b, boolean inclusive) throws ParseException {
+//System.out.println("getRangeQuery:");
+
+      this.type = QType.RANGE;
+      this.field = field;
+      this.val = a;
+      this.val2 = b;
+      this.bool = inclusive;
+      return getAliasedQuery();
+    }
+
+    @Override
+    protected Query getWildcardQuery(String field, String val) throws ParseException {
+//System.out.println("getWildcardQuery: val="+val);
+
+      if (val.equals("*")) {
+        if (field.equals("*")) {
+          return new MatchAllDocsQuery();
+        } else{
+          return getPrefixQuery(field,"");
+        }
+      }
+      this.type = QType.WILDCARD;
+      this.field = field;
+      this.val = val;
+      return getAliasedQuery();
+    }
+
+    @Override
+    protected Query getFuzzyQuery(String field, String val, float minSimilarity) throws ParseException {
+//System.out.println("getFuzzyQuery: val="+val);
+
+      this.type = QType.FUZZY;
+      this.field = field;
+      this.val = val;
+      this.flt = minSimilarity;
+      return getAliasedQuery();
+    }
+
+    /**
+     * Delegates to the super class unless the field has been specified
+     * as an alias -- in which case we recurse on each of
+     * the aliased fields, and the results are composed into a
+     * DisjunctionMaxQuery.  (so yes: aliases which point at other
+     * aliases should work)
+     */
+    protected Query getAliasedQuery()
+      throws ParseException {
+      Alias a = aliases.get(field);
+      if (a != null) {
+        List<Query> lst = getQueries(a);
+        if (lst == null || lst.size()==0)
+            return getQuery();
+        // make a DisjunctionMaxQuery in this case too... it will stop
+        // the "mm" processing from making everything required in the case
+        // that the query expanded to multiple clauses.
+        // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
+        // if (lst.size()==1) return lst.get(0);
+
+        if (makeDismax) {
+          DisjunctionMaxQuery q = new DisjunctionMaxQuery(lst, a.tie);
+          return q;
+        } else {
+          // should we disable coord?
+          BooleanQuery q = new BooleanQuery(disableCoord);
+          for (Query sub : lst) {
+            q.add(sub, BooleanClause.Occur.SHOULD);
+          }
+          return q;
+        }
+      } else {
+        return getQuery();
+      }
+    }
+
+
+     protected List<Query> getQueries(Alias a) throws ParseException {
+       if (a == null) return null;
+       if (a.fields.size()==0) return null;
+       List<Query> lst= new ArrayList<Query>(4);
+
+       for (String f : a.fields.keySet()) {
+         this.field = f;
+         Query sub = getQuery();
+         if (sub != null) {
+           Float boost = a.fields.get(f);
+           if (boost != null) {
+              sub.setBoost(boost);
+           }
+           lst.add(sub);
+         }
+       }
+       return lst;
+     }
+
+    private Query getQuery() throws ParseException {
+      try {
+
+        switch (type) {
+          case FIELD:  // fallthrough
+          case PHRASE:
+            Query query = super.getFieldQuery(field, val);
+            if (query instanceof PhraseQuery) {
+              PhraseQuery pq = (PhraseQuery)query;
+              if (minClauseSize > 1 && pq.getTerms().length < minClauseSize) return null;
+              ((PhraseQuery)query).setSlop(slop);
+            } else if (query instanceof MultiPhraseQuery) {
+              MultiPhraseQuery pq = (MultiPhraseQuery)query;
+              if (minClauseSize > 1 && pq.getTermArrays().size() < minClauseSize) return null;
+              ((MultiPhraseQuery)query).setSlop(slop);
+            } else if (minClauseSize > 1) {
+              // if it's not a type of phrase query, it doesn't meet the minClauseSize requirements
+              return null;
+            }
+            return query;
+          case PREFIX: return super.getPrefixQuery(field, val);
+          case WILDCARD: return super.getWildcardQuery(field, val);
+          case FUZZY: return super.getFuzzyQuery(field, val, flt);
+          case RANGE: return super.getRangeQuery(field, val, val2, bool);
+        }
+        return null;
+
+      } catch (Exception e) {
+        // an exception here is due to the field query not being compatible with the input text
+        // for example, passing a string to a numeric field.
+        return null;
+      }
+    }
+  }
+
+
+  static boolean isEmpty(Query q) {
+    if (q==null) return true;
+    if (q instanceof BooleanQuery && ((BooleanQuery)q).clauses().size()==0) return true;
+    return false;
+  }
+}
+
+
+class ExtendedAnalyzer extends Analyzer {
+  final Map<String, Analyzer> map = new HashMap<String, Analyzer>();
+  final QParser parser;
+  final Analyzer queryAnalyzer;
+  public boolean removeStopFilter = false;
+
+  public static TokenizerChain getQueryTokenizerChain(QParser parser, String fieldName) {
+    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
+    Analyzer qa = ft.getQueryAnalyzer();
+    return qa instanceof TokenizerChain ? (TokenizerChain)qa : null;
+  }
+
+  public static StopFilterFactory getQueryStopFilter(QParser parser, String fieldName) {
+    TokenizerChain tcq = getQueryTokenizerChain(parser, fieldName);
+    if (tcq == null) return null;
+    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();
+
+    for (int i=0; i<facs.length; i++) {
+      TokenFilterFactory tf = facs[i];
+      if (tf instanceof StopFilterFactory) {
+        return (StopFilterFactory)tf;
+      }
+    }
+    return null;
+  }
+
+  public ExtendedAnalyzer(QParser parser) {
+    this.parser = parser;
+    this.queryAnalyzer = parser.getReq().getSchema().getQueryAnalyzer();
+  }
+
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    if (!removeStopFilter) {
+      return queryAnalyzer.tokenStream(fieldName, reader);
+    }
+    
+    Analyzer a = map.get(fieldName);
+    if (a != null) {
+      return a.tokenStream(fieldName, reader);
+    }
+
+    FieldType ft = parser.getReq().getSchema().getFieldType(fieldName);
+    Analyzer qa = ft.getQueryAnalyzer();
+    if (!(qa instanceof TokenizerChain)) {
+      map.put(fieldName, qa);
+      return qa.tokenStream(fieldName, reader);
+    }
+    TokenizerChain tcq = (TokenizerChain)qa;
+    Analyzer ia = ft.getAnalyzer();
+    if (ia == qa || !(ia instanceof TokenizerChain)) {
+      map.put(fieldName, qa);
+      return qa.tokenStream(fieldName, reader);
+    }
+    TokenizerChain tci = (TokenizerChain)ia;
+
+    // make sure that there isn't a stop filter in the indexer
+    for (TokenFilterFactory tf : tci.getTokenFilterFactories()) {
+      if (tf instanceof StopFilterFactory) {
+        map.put(fieldName, qa);
+        return qa.tokenStream(fieldName, reader);
+      }
+    }
+
+    // now if there is a stop filter in the query analyzer, remove it
+    int stopIdx = -1;
+    TokenFilterFactory[] facs = tcq.getTokenFilterFactories();
+
+    for (int i=0; i<facs.length; i++) {
+      TokenFilterFactory tf = facs[i];
+      if (tf instanceof StopFilterFactory) {
+        stopIdx = i;
+        break;
+      }
+    }
+
+    if (stopIdx == -1) {
+      // no stop filter exists
+      map.put(fieldName, qa);
+      return qa.tokenStream(fieldName, reader);
+    }
+
+    TokenFilterFactory[] newtf = new TokenFilterFactory[facs.length-1];
+    for (int i=0,j=0; i<facs.length; i++) {
+      if (i==stopIdx) continue;
+      newtf[j++] = facs[i];
+    }
+
+    TokenizerChain newa = new TokenizerChain(tcq.getTokenizerFactory(), newtf);
+    newa.setPositionIncrementGap(tcq.getPositionIncrementGap(fieldName));
+
+    map.put(fieldName, newa);
+    return newa.tokenStream(fieldName, reader);        
+  }
+
+  public int getPositionIncrementGap(String fieldName) {
+    return queryAnalyzer.getPositionIncrementGap(fieldName);
+  }
+
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    if (!removeStopFilter) {
+      return queryAnalyzer.reusableTokenStream(fieldName, reader);
+    }
+    // TODO: done to fix stop word removal bug - could be done while still using resusable?
+    return tokenStream(fieldName, reader);
+  }
+}

Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/solr/trunk/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/search/QParserPlugin.java Tue Nov 17 21:46:38 2009
@@ -32,6 +32,7 @@
     PrefixQParserPlugin.NAME, PrefixQParserPlugin.class,
     BoostQParserPlugin.NAME, BoostQParserPlugin.class,
     DisMaxQParserPlugin.NAME, DisMaxQParserPlugin.class,
+    ExtendedDismaxQParserPlugin.NAME, ExtendedDismaxQParserPlugin.class,
     FieldQParserPlugin.NAME, FieldQParserPlugin.class,
     RawQParserPlugin.NAME, RawQParserPlugin.class,
     NestedQParserPlugin.NAME, NestedQParserPlugin.class,

Added: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java?rev=881546&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java Tue Nov 17 21:46:38 2009
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+
+public class TestExtendedDismaxParser extends AbstractSolrTestCase {
+  public String getSchemaFile() { return "schema12.xml"; }
+  public String getSolrConfigFile() { return "solrconfig.xml"; }
+  // public String getCoreName() { return "collection1"; }
+
+  public void setUp() throws Exception {
+    // if you override setUp or tearDown, you better call
+    // the super classes version
+    super.setUp();
+  }
+  public void tearDown() throws Exception {
+    // if you override setUp or tearDown, you better call
+    // the super classes version
+    super.tearDown();
+  }
+
+  // test the edismax query parser based on the dismax parser
+  public void testFocusQueryParser() {
+    assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
+            "name", "Zapp Brannigan"));
+    assertU(adoc("id", "43" ,
+            "title", "Democratic Order op Planets"));
+    assertU(adoc("id", "44", "trait_ss", "Tool",
+            "name", "The Zapper"));
+    assertU(adoc("id", "45", "trait_ss", "Chauvinist",
+            "title", "25 star General"));
+    assertU(adoc("id", "46", "trait_ss", "Obnoxious",
+            "subject", "Defeated the pacifists op the Gandhi nebula"));
+    assertU(adoc("id", "47", "trait_ss", "Pig",
+            "text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
+    assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
+    assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
+    assertU(adoc("id", "50", "text_sw", "start new big city end"));
+
+    assertU(commit());
+    String allq = "id:[42 TO 50]";
+    String allr = "*[count(//doc)=9]";
+    String oner = "*[count(//doc)=1]";
+    String twor = "*[count(//doc)=2]";
+    String nor = "*[count(//doc)=0]";
+
+
+    assertQ("standard request handler returns all matches",
+            req(allq),
+            allr
+    );
+
+   assertQ("edismax query parser returns all matches",
+            req("q", allq,
+                "defType", "edismax"
+            ),
+            allr
+    );
+
+   assertQ(req("defType", "edismax", "qf", "trait_ss",
+               "q","Tool"), twor
+    );
+
+   // test that field types that aren't applicable don't cause an exception to be thrown
+   assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b",
+               "q","Tool"), twor
+    );
+
+   // test that numeric field types can be queried
+   assertQ(req("defType", "edismax", "qf", "text_sw",
+               "q","foo_i:100"), oner
+    );
+
+   // test that numeric field types can be queried
+   assertQ(req("defType", "edismax", "qf", "text_sw",
+               "q","foo_i:-100"), oner
+    );
+
+   // test that numeric field types can be queried  via qf
+   assertQ(req("defType", "edismax", "qf", "text_sw foo_i",
+               "q","100"), oner
+    );
+
+   assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","op"), twor
+    );
+   assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","Order op"), oner
+    );
+   assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","Order AND op"), oner
+    );
+   assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","Order and op"), oner
+    );
+    assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","+Order op"), oner
+    );
+    assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","Order OR op"), twor
+    );
+    assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","Order or op"), twor
+    );
+    assertQ(req("defType", "edismax", "qf", "name title subject text",
+               "q","*:*"), allr
+    );
+
+    assertQ(req("defType", "edismax", "qf", "name title subject text",
+           "q","star OR (-star)"), allr
+    );
+    assertQ(req("defType", "edismax", "qf", "name title subject text",
+           "q","id:42 OR (-id:42)"), allr
+    );
+
+    // test that basic synonyms work
+    assertQ(req("defType", "edismax", "qf", "text_sw",
+           "q","GB"), oner
+    );
+
+    // test for stopword removal in main query part
+    assertQ(req("defType", "edismax", "qf", "text_sw",
+           "q","the big"), twor
+    );
+
+    // test for stopwords not removed   
+    assertQ(req("defType", "edismax", "qf", "text_sw", "stopwords","false",
+           "q","the big"), oner
+    );
+
+    /** stopword removal in conjunction with multi-word synonyms at query time
+     * break this test.
+     // multi-word synonyms
+     // remove id:50 which contans the false match      
+    assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true",
+           "q","-id:50 nyc"), oner
+    );
+    **/
+
+    /*** these fail because multi-word synonyms are being used at query time
+    // this will incorrectly match "new big city"
+    assertQ(req("defType", "edismax", "qf", "id title",
+           "q","nyc"), oner
+    );
+
+    // this will incorrectly match "new big city"
+    assertQ(req("defType", "edismax", "qf", "title",
+           "q","the big apple"), nor
+    );
+    ***/
+
+  }
+
+}
\ No newline at end of file

Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: lucene/solr/trunk/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema.xml Tue Nov 17 21:46:38 2009
@@ -339,7 +339,7 @@
     <fieldtype name="syn" class="solr.TextField">
       <analyzer>
           <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-          <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
+          <filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
       </analyzer>
     </fieldtype>
     
@@ -350,7 +350,7 @@
       <analyzer>
           <tokenizer class="solr.WhitespaceTokenizerFactory"/>
           <filter class="solr.SynonymFilterFactory"
-                  synonyms="synonyms.txt" expand="true" />
+                  synonyms="old_synonyms.txt" expand="true" />
           <filter class="solr.EnglishPorterFilterFactory"/>
           <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
       </analyzer>

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema12.xml Tue Nov 17 21:46:38 2009
@@ -325,6 +325,32 @@
           <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
       </analyzer>
     </fieldtype>
+
+    <!-- a text field with the stop filter only on the query analyzer 
+     -->
+    <fieldType name="text_sw" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
+                catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
+                catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
     
     <!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
          synonyms "better"
@@ -461,9 +487,11 @@
 
    <dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
 
+   <dynamicField name="*_sw" type="text_sw" indexed="true" stored="true" multiValued="true"/>
 
    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_ss"  type="string"  indexed="true"  stored="true" multiValued="true"/>
    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
    <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
    <dynamicField name="*_tt"  type="text"    indexed="true"  stored="true"/>

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/stopwords.txt Tue Nov 17 21:46:38 2009
@@ -1,16 +1,58 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-stopworda
-stopwordb
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+#Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+s
+such
+t
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
+

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt?rev=881546&r1=881545&r2=881546&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/synonyms.txt Tue Nov 17 21:46:38 2009
@@ -1,22 +1,31 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-a => aa
-b => b1 b2
-c => c1,c2
-a\=>a => b\=>b
-a\,a => b\,b
-foo,bar,baz
-
-Television,TV,Televisions
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaa => aaaa
+bbb => bbbb1 bbbb2
+ccc => cccc1,cccc2
+a\=>a => b\=>b
+a\,a => b\,b
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+