You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2011/11/25 17:54:10 UTC

svn commit: r1206258 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/test-framework/ lucene/backwards/src/test/ lucene/src/java/org/apache/lucene/queryParser/ solr/ solr/core/src/java/org/apache/solr/schema/ solr/core/src/java/org/...

Author: erick
Date: Fri Nov 25 16:54:07 2011
New Revision: 1206258

URL: http://svn.apache.org/viewvc?rev=1206258&view=rev
Log:
SOLR-2438 allowing "multiterm" entry in the schema analysis chain, synthesizing one from the existing query chain if not present

Added:
    lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test-framework/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldType.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/SchemaField.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/TextField.java
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
    lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml
    lucene/dev/branches/branch_3x/solr/solrj/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/queryParser/QueryParser.java Fri Nov 25 16:54:07 2011
@@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.VirtualMethod;
 
@@ -875,6 +876,41 @@ public class QueryParser implements Quer
     return new FuzzyQuery(term,minimumSimilarity,prefixLength);
   }
 
+  protected String analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
+    TokenStream source;
+
+    if (analyzerIn == null) analyzerIn = analyzer;
+
+    try {
+      source = analyzerIn.tokenStream(field, new StringReader(part));
+      source.reset();
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
+    }
+
+    CharTermAttribute termAtt = source.getAttribute(CharTermAttribute.class);
+    String termRet = "";
+
+    try {
+      if (!source.incrementToken())
+        throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
+      termRet = termAtt.toString();
+      if (source.incrementToken())
+        throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
+    } catch (IOException e) {
+      throw new RuntimeException("error analyzing range part: " + part, e);
+    }
+
+    try {
+      source.end();
+      source.close();
+    } catch (IOException e) {
+      throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
+    }
+
+    return termRet;
+  }
+
   /**
    * Builds a new TermRangeQuery instance
    * @param field Field

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Fri Nov 25 16:54:07 2011
@@ -30,6 +30,10 @@ New Features
 
 * SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update
   format (shalin)
+  
+* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
+  a complete analysis chain for multiterm queries.   
+  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
 
 Bug Fixes
 ----------------------
@@ -54,6 +58,340 @@ Upgrading from Solr 3.4
 
 New Features
 ----------------------
+
+* SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now 
+  supports "percentages" which get evaluated  relative the current size of 
+  the cache when warming happens. 
+  (Tomas Fernandez Lobbe and hossman)
+
+* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
+  norm, maxdoc, numdocs. (yonik)
+
+* SOLR-1665: Add debug component options for timings, results and query info only (gsingers, hossman, yonik)
+
+* SOLR-2001: The query component will substitute an empty query that matches
+  no documents if the query parser returns null.  This also prevents an
+  exception from being thrown by the default parser if "q" is missing. (yonik)
+
+* SOLR-2112: Solrj API now supports streaming results. (ryan)
+
+* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
+  (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
+
+* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
+  DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
+  levenshtein automata.  (James Dyer, rmuir)
+   
+* SOLR-1873: SolrCloud - added shared/central config and core/shard management via zookeeper,
+  built-in load balancing, and infrastructure for future SolrCloud work. (yonik, Mark Miller)
+  Additional Work:
+  SOLR-2324: SolrCloud solr.xml parameters are not persisted by CoreContainer.
+  (Massimo Schiavon, Mark Miller)
+  SOLR-2799: Update CloudState incrementally rather than always reading the data at each zk 
+  node. (Jamie Johnson via Mark Miller)
+
+* SOLR-1729: Evaluation of NOW for date math is done only once per request for
+  consistency, and is also propagated to shards in distributed search.
+  Adding a parameter NOW=<time_in_ms> to the request will override the
+  current time.  (Peter Sturge, yonik)
+
+* SOLR-1566: Transforming documents in the ResponseWriters.  This will allow
+  for more complex results in responses and open the door for function queries
+  as results. 
+  (ryan with patches from grant, noble, cmale, yonik, Jan Høydahl, 
+  Arul Kalaipandian, hossman)
+  SOLR-2037: Thanks to SOLR-1566, documents boosted by the QueryElevationComponent
+  can be marked as boosted.  (gsingers, ryan, yonik)
+
+* SOLR-2396: Add CollationField, which is much more efficient than 
+  the Solr 3.x CollationKeyFilterFactory, and also supports 
+  Locale-sensitive range queries. (rmuir)
+
+* SOLR-2338: Add support for using <similarity/> in a schema's fieldType,
+  for customizing scoring on a per-field basis. (hossman, yonik, rmuir)
+  
+* SOLR-2335: New 'field("...")' function syntax for refering to complex 
+  field names (containing whitespace or special characters) in functions.
+
+* SOLR-1709: Distributed support for Date and Numeric Range Faceting
+  (Peter Sturge, David Smiley, hossman)
+
+* SOLR-2383: /browse improvements: generalize range and date facet display
+  (Jan Høydahl via yonik)
+
+* SOLR-2272: Pseudo-join queries / filters.  Examples:
+  To restrict to the set of parents with at least one blue-eyed child:
+    fq={!join from=parent to=name}eyes:blue
+  To restrict to the set of children with at least one blue-eyed parent:
+    fq={!join from=name to=parent}eyes:blue
+  (yonik)
+
+* SOLR-1942: Added the ability to select postings format per fieldType in schema.xml
+  as well as support custom Codecs in solrconfig.xml.
+  (simonw via rmuir)
+
+* SOLR-2136: Boolean type added to function queries, along with
+  new functions exists(), if(), and(), or(), xor(), not(), def(),
+  and true and false constants. (yonik) 
+
+* SOLR-2491: Add support for using spellcheck collation in conjunction
+  with grouping. Note that the number of hits returned for collations
+  is the number of ungrouped hits.  (James Dyer via rmuir)
+  
+* SOLR-1298: Return FunctionQuery as pseudo field.  The solr 'fl' param
+  now supports functions.  For example:  fl=id,sum(x,y) -- NOTE: only
+  functions with fast random access are reccomended. (yonik, ryan)
+  
+* SOLR-705: Optionally return shard info with each document in distributed
+  search.  Use fl=id,[shard]  to return the shard url.  (ryan)  
+
+* SOLR-2417: Add explain info directly to return documents using 
+  ?fl=id,[explain] (ryan)
+
+* SOLR-2533: Converted ValueSource.ValueSourceSortField over to new rewriteable Lucene
+  SortFields.  ValueSourceSortField instances must be rewritten before they can be used.
+  This is done by SolrIndexSearcher when necessary. (Chris Male).
+
+* SOLR-2193, SOLR-2565: You may now specify a 'soft' commit when committing. This will
+  use Lucene's NRT feature to avoid guaranteeing documents are on stable storage in exchange
+  for faster reopen times. There is also a new 'soft' autocommit tracker that can be
+  configured. (Mark Miller, Robert Muir)
+
+* SOLR-2399: Updated Solr Admin interface.  New look and feel with per core administration
+  and many new options.  (Stefan Matheis via ryan)
+
+* SOLR-1032: CSV handler now supports "literal.field_name=value" parameters.
+  (Simon Rosenthal, ehatcher)
+
+* SOLR-2656: realtime-get, efficiently retrieves the latest stored fields for specified
+  documents, even if they are not yet searchable (i.e. without reopening a searcher)
+  (yonik)
+  
+* SOLR-2703: Added support for Lucene's "surround" query parser. (Simon Rosenthal, ehatcher)
+
+* SOLR-2754: Added factories for several ranking algorithms: 
+    BM25SimilarityFactory: Okapi BM25
+    DFRSimilarityFactory: Divergence from Randomness models
+    IBSimilarityFactory: Information-based models
+    LMDirichletSimilarity: LM with Dirichlet smoothing
+    LMJelinekMercerSimilarity: LM with Jelinek-Mercer smoothing
+ (David Mark Nemeskey, Robert Muir)
+ 
+* SOLR-2134 Trie* fields should support sortMissingLast=true, and deprecate Sortable* Field Types
+  (Ryan McKinley, Mike McCandless, Uwe Schindler, Erick Erickson)
+    
+* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
+  a complete analysis chain for multiterm queries.   
+  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
+
+
+Optimizations
+----------------------
+
+* SOLR-1875: Per-segment field faceting for single valued string fields.
+  Enable with facet.method=fcs, control the number of threads used with
+  the "threads" local param on the facet.field param.  This algorithm will
+  only be faster in the presence of rapid index changes.  (yonik)
+
+* SOLR-1904: When facet.enum.cache.minDf > 0 and the base doc set is a
+  SortedIntSet, convert to HashDocSet for better performance. (yonik)
+
+* SOLR-1843: A new "rootName" attribute is now available when
+  configuring <jmx/> in solrconfig.xml.  If this attribute is set,
+  Solr will use it as the root name for all MBeans Solr exposes via
+  JMX.  The default root name is "solr" followed by the core name.
+  (Constantijn Visinescu, hossman)
+
+* SOLR-2092: Speed up single-valued and multi-valued "fc" faceting. Typical
+  improvement is 5%, but can be much greater (up to 10x faster) when facet.offset
+  is very large (deep paging). (yonik)
+  
+* SOLR-2193, SOLR-2565: The default Solr update handler has been improved so
+  that it uses fewer locks, keeps the IndexWriter open rather than closing it
+  on each commit (ie commits no longer wait for background merges to complete), 
+  works with SolrCore to provide faster 'soft' commits, and has an improved API 
+  that requires less instanceof special casing. (Mark Miller, Robert Muir)
+  Additional Work:
+  SOLR-2697: commit and autocommit operations don't reset 
+  DirectUpdateHandler2.numDocsPending stats attribute.
+  (Alexey Serba, Mark Miller)
+
+Bug Fixes
+----------------------
+
+* SOLR-2762: FSTLookup could return duplicate results or one results less
+  than requested. (David Smiley, Dawid Weiss)
+
+* SOLR-2741: Bugs in facet range display in trunk (janhoy)
+
+* SOLR-1908: Fixed SignatureUpdateProcessor to fail to initialize on
+  invalid config.  Specifically: a signatureField that does not exist,
+  or overwriteDupes=true with a signatureField that is not indexed.
+  (hossman)
+
+* SOLR-1824: IndexSchema will now fail to initialize if there is a
+  problem initializing one of the fields or field types. (hossman)
+
+* SOLR-1928: TermsComponent didn't correctly break ties for non-text
+  fields sorted by count. (yonik)
+
+* SOLR-2107: MoreLikeThisHandler doesn't work with alternate qparsers. (yonik)
+
+* SOLR-2108: Fixed false positives when using wildcard queries on fields with reversed
+  wildcard support. For example, a query of *zemog* would match documents that contain
+  'gomez'.  (Landon Kuhn via Robert Muir)
+
+* SOLR-1962: SolrCore#initIndex should not use a mix of indexPath and newIndexPath (Mark Miller)
+
+* SOLR-2275: fix DisMax 'mm' parsing to be tolerant of whitespace
+  (Erick Erickson via hossman)
+  
+* SOLR-2193, SOLR-2565, SOLR-2651: SolrCores now properly share IndexWriters across SolrCore reloads.
+  (Mark Miller, Robert Muir)
+  Additional Work:
+  SOLR-2705: On reload, IndexWriterProvider holds onto the initial SolrCore it was created with.
+  (Yury Kats, Mark Miller)
+
+* SOLR-2682: Remove addException() in SimpleFacet. FacetComponent no longer catches and embeds
+  exceptions occurred during facet processing, it throws HTTP 400 or 500 exceptions instead. (koji)
+
+* SOLR-2654: Directorys used by a SolrCore are now closed when they are no longer used.
+  (Mark Miller)
+  
+* SOLR-2854: Now load URL content stream data (via stream.url) when called for during request handling,
+  rather than loading URL content streams automatically regardless of use.
+  (David Smiley and Ryan McKinley via ehatcher)
+  
+* SOLR-2829: Fix problem with false-positives due to incorrect
+  equals methods. (Yonik Seeley, Hossman, Erick Erickson. 
+  Marc Tinnemeyer caught the bug)  
+
+* SOLR-2848: Removed 'instanceof AbstractLuceneSpellChecker' hacks from distributed spellchecking code,
+  and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker
+  your spellchecker would not work in distributed fashion.  (James Dyer via rmuir)
+  
+Other Changes
+----------------------
+
+* SOLR-1846: Eliminate support for the abortOnConfigurationError
+  option.  It has never worked very well, and in recent versions of
+  Solr hasn't worked at all.  (hossman)
+
+* SOLR-1889: The default logic for the 'mm' param of DismaxQParser and
+  ExtendedDismaxQParser has been changed to be determined based on the
+  effective value of the 'q.op' param (hossman)
+
+* SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system
+  (hossman)
+
+* SOLR-2289: Tweak spatial coords for example docs so they are a bit
+  more spread out (Erick Erickson via hossman)
+
+* SOLR-2288: Small tweaks to eliminate compiler warnings.  primarily
+  using Generics where applicable in method/object declatations, and
+  adding @SuppressWarnings("unchecked") when appropriate (hossman)
+
+* SOLR-2375: Suggester Lookup implementations now store trie data
+  and load it back on init. This means that large tries don't have to be
+  rebuilt on every commit or core reload. (ab)
+
+* SOLR-2413: Support for returning multi-valued fields w/o <arr> tag 
+  in the XMLResponseWriter was removed.  XMLResponseWriter only 
+  no longer work with values less then 2.2 (ryan)
+
+* SOLR-2423: FieldType argument changed from String to Object
+  Conversion from SolrInputDocument > Object > Fieldable is now managed
+  by FieldType rather then DocumentBuilder.  (ryan)
+
+* SOLR-2461: QuerySenderListener and AbstractSolrEventListener are 
+  now public (hossman)
+
+* LUCENE-2995: Moved some spellchecker and suggest APIs to modules/suggest:
+  HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
+  suggester APIs and implementations. (rmuir)
+
+* SOLR-2576: Remove deprecated SpellingResult.add(Token, int).
+  (James Dyer via rmuir)
+
+* LUCENE-3232: Moved MutableValue classes to new 'common' module. (Chris Male)
+
+* LUCENE-2883: FunctionQuery, DocValues (and its impls), ValueSource (and its
+  impls) and BoostedQuery have been consolidated into the queries module.  They
+  can now be found at o.a.l.queries.function.
+
+* SOLR-2027: FacetField.getValues() now returns an empty list if there are no
+  values, instead of null (Chris Male)
+
+* SOLR-1825: SolrQuery.addFacetQuery now enables facets automatically, like
+  addFacetField (Chris Male)
+
+* SOLR-2663: FieldTypePluginLoader has been refactored out of IndexSchema 
+  and made public. (hossman)
+
+* SOLR-2331,SOLR-2691: Refactor CoreContainer's SolrXML serialization code and improve testing
+  (Yury Kats, hossman, Mark Miller)
+  
+* SOLR-2698: Enhance CoreAdmin STATUS command to return index size.
+  (Yury Kats, hossman, Mark Miller)
+  
+* SOLR-2654: The same Directory instance is now always used across a SolrCore so that
+  it's easier to add other DirectoryFactory's without static caching hacks.
+  (Mark Miller)
+
+* LUCENE-3286: 'luke' ant target has been disabled due to incompatibilities with XML
+  queryparser location (Chris Male)
+  
+* SOLR-1897: The data dir from the core descriptor should override the data dir from 
+  the solrconfig.xml rather than the other way round. (Mark Miller)
+
+* SOLR-2756: Maven configuration: Excluded transitive stax:stax-api dependency
+  from org.codehaus.woodstox:wstx-asl dependency. (David Smiley via Steve Rowe)
+
+* SOLR-2588: Moved VelocityResponseWriter back to contrib module in order to 
+  remove it as a mandatory core dependency.  (ehatcher)
+  
+* SOLR-2718: Add ability to lazy load response writers, defined with startup="lazy".
+  (ehatcher)
+
+* SOLR-2862: More explicit lexical resources location logged if Carrot2 clustering 
+  extension is used. Fixed solr. impl. of IResource and IResourceLookup. (Dawid Weiss)
+
+* SOLR-1123: Changed JSONResponseWriter to now use application/json as its Content-Type
+  by default.  However the Content-Type can be overwritten and is set to text/plain in
+  the example configuration. (Uri Boness, Chris Male)
+  
+* SOLR-2607: Removed deprecated client/ruby directory, which included solr-ruby and flare.
+  (ehatcher)
+
+Documentation
+----------------------
+
+* SOLR-2232: Improved README info on solr.solr.home in examples
+  (Eric Pugh and hossman)
+  
+==================  3.6.0  ==================
+
+New Features
+----------------------
+* SOLR-2904: BinaryUpdateRequestHandler should be able to accept multiple update requests from
+  a stream (shalin)
+
+* SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update
+  format (shalin)
+
+* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
+  a complete analysis chain for multiterm queries.   
+  (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
+
+
+Bug Fixes
+----------------------
+* SOLR-2912: Fixed File descriptor leak in ShowFileRequestHandler (Michael Ryan, shalin)
+
+==================  3.5.0  ==================
+
+New Features
+----------------------
 * SOLR-2749: Add boundary scanners for FastVectorHighlighter. <boundaryScanner/>
   can be specified with a name in solrconfig.xml, and use hl.boundaryScanner=name
   parameter to specify the named <boundaryScanner/>. (koji)

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java Fri Nov 25 16:54:07 2011
@@ -48,13 +48,15 @@ public abstract class FieldProperties {
   
   protected final static int REQUIRED            = 0x00001000;
   protected final static int OMIT_POSITIONS      = 0x00002000;
+  protected final static int LEGACY_MULTITERM    = 0x00004000;
   
   static final String[] propertyNames = {
           "indexed", "tokenized", "stored",
           "binary", "omitNorms", "omitTermFreqAndPositions",
           "termVectors", "termPositions", "termOffsets",
           "multiValued",
-          "sortMissingFirst","sortMissingLast","required", "omitPositions"
+          "sortMissingFirst","sortMissingLast","required", "omitPositions",
+          "legacyMultiTerm"
   };
 
   static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldType.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/FieldType.java Fri Nov 25 16:54:07 2011
@@ -431,6 +431,21 @@ public abstract class FieldType extends 
   protected Analyzer queryAnalyzer=analyzer;
 
   /**
+    * Analyzer set by schema for text types to use when searching fields
+    * of this type, subclasses can set analyzer themselves or override
+    * getAnalyzer()
+    * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
+    * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
+    * lowercasing filters, and charfilters.
+    *
+    * If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file
+    * @see #getMultiTermAnalyzer
+    * @see #setMultiTermAnalyzer
+    */
+   protected Analyzer multiTermAnalyzer=null;
+
+
+  /**
    * Returns the Analyzer to be used when indexing fields of this type.
    * <p>
    * This method may be called many times, at any time.
@@ -452,7 +467,19 @@ public abstract class FieldType extends 
     return queryAnalyzer;
   }
 
-  private final String analyzerError = 
+  /**
+   * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
+   * <p>
+   * This method may be called many times, at any time.
+   * </p>
+   *
+   * @see #getAnalyzer
+   */
+  public Analyzer getMultiTermAnalyzer() {
+    return multiTermAnalyzer;
+  }
+
+  private final String analyzerError =
     "FieldType: " + this.getClass().getSimpleName() + 
     " (" + typeName + ") does not support specifying an analyzer";
 
@@ -480,6 +507,28 @@ public abstract class FieldType extends 
 
   /**
    * Sets the Analyzer to be used when querying fields of this type.
+   * <p/>
+   * <p>
+   * <p/>
+   * Subclasses that override this method need to ensure the behavior
+   * of the analyzer is consistent with the implementation of toInternal.
+   * </p>
+   *
+   * @see #toInternal
+   * @see #setAnalyzer
+   * @see #getQueryAnalyzer
+   */
+  public void setMultiTermAnalyzer(Analyzer analyzer) {
+    SolrException e = new SolrException
+        (ErrorCode.SERVER_ERROR,
+            "FieldType: " + this.getClass().getSimpleName() +
+                " (" + typeName + ") does not support specifying an analyzer");
+    SolrException.logOnce(log, null, e);
+    throw e;
+  }
+
+  /**
+   * Sets the Analyzer to be used when querying fields of this type.
    *
    * <p>
    * The default implementation throws a SolrException.  
@@ -500,6 +549,7 @@ public abstract class FieldType extends 
     throw e;
   }
 
+
   /**
    * Renders the specified field as XML
    */

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/IndexSchema.java Fri Nov 25 16:54:07 2011
@@ -18,11 +18,13 @@
 package org.apache.solr.schema;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.KeywordAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.util.Version;
+import org.apache.solr.analysis.*;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.SolrParams;
@@ -31,10 +33,6 @@ import org.apache.solr.common.util.Syste
 import org.apache.solr.core.SolrConfig;
 import org.apache.solr.core.Config;
 import org.apache.solr.core.SolrResourceLoader;
-import org.apache.solr.analysis.CharFilterFactory;
-import org.apache.solr.analysis.TokenFilterFactory;
-import org.apache.solr.analysis.TokenizerChain;
-import org.apache.solr.analysis.TokenizerFactory;
 import org.apache.solr.search.SolrQueryParser;
 import org.apache.solr.util.plugin.AbstractPluginLoader;
 import org.apache.solr.util.plugin.SolrCoreAware;
@@ -444,6 +442,11 @@ public final class IndexSchema {
           Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
           Analyzer queryAnalyzer = readAnalyzer(anode);
 
+          expression = "./analyzer[@type='multiterm']";
+          anode = (Node) xpath.evaluate(expression, node, XPathConstants.NODE);
+          Analyzer multiAnalyzer = readAnalyzer(anode);
+
+
           // An analyzer without a type specified, or with type="index"
           expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
           anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
@@ -451,9 +454,17 @@ public final class IndexSchema {
 
           if (queryAnalyzer==null) queryAnalyzer=analyzer;
           if (analyzer==null) analyzer=queryAnalyzer;
+          if (multiAnalyzer == null) {
+            Boolean legacyMatch = ! solrConfig.luceneMatchVersion.onOrAfter(Version.LUCENE_36);;
+            legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch :
+                Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null));
+            multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch);
+          }
+
           if (analyzer!=null) {
             ft.setAnalyzer(analyzer);
             ft.setQueryAnalyzer(queryAnalyzer);
+            ft.setMultiTermAnalyzer(multiAnalyzer);
           }
           if (ft instanceof SchemaAware){
             schemaAware.add((SchemaAware) ft);
@@ -697,6 +708,42 @@ public final class IndexSchema {
     }
   }
 
+  // The point here is that, if no multitermanalyzer was specified in the schema file, do one of several things:
+  // 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
+  //    lowercase filters and asciifoldingfilter.
+  // 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
+  //    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
+
+  private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) {
+    if (queryAnalyzer == null) return null;
+
+    if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) {
+      return new KeywordAnalyzer();
+    }
+
+    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
+
+    // we know it'll never be longer than this unless the code below is explicitly changed
+    TokenFilterFactory[] filters = new TokenFilterFactory[2];
+    int idx = 0;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      if (factory instanceof LowerCaseFilterFactory) {
+        filters[idx] = new LowerCaseFilterFactory();
+        filters[idx++].init(factory.getArgs());
+      }
+      if (factory instanceof ASCIIFoldingFilterFactory) {
+        filters[idx] = new ASCIIFoldingFilterFactory();
+        filters[idx++].init(factory.getArgs());
+      }
+    }
+    WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory();
+    white.init(tc.getTokenizerFactory().getArgs());
+
+    return new TokenizerChain(tc.getCharFilterFactories(),
+        white,
+        Arrays.copyOfRange(filters, 0, idx));
+  }
+
   /**
    * Register one or more new Dynamic Field with the Schema.
    * @param f The {@link org.apache.solr.schema.SchemaField}

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/SchemaField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/SchemaField.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/SchemaField.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/SchemaField.java Fri Nov 25 16:54:07 2011
@@ -99,6 +99,9 @@ public final class SchemaField extends F
   boolean isTokenized() { return (properties & TOKENIZED)!=0; }
   boolean isBinary() { return (properties & BINARY)!=0; }
 
+  boolean legacyMultiTerm() {
+    return (properties & LEGACY_MULTITERM) != 0;
+  }
 
   public Fieldable createField(String val, float boost) {
     return type.createField(this,val,boost);

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/TextField.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/TextField.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/schema/TextField.java Fri Nov 25 16:54:07 2011
@@ -98,6 +98,11 @@ public class TextField extends FieldType
     this.queryAnalyzer = analyzer;
   }
 
+  @Override
+  public void setMultiTermAnalyzer(Analyzer analyzer) {
+    this.multiTermAnalyzer = analyzer;
+  }
+
   static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
     int phraseSlop = 0;
     boolean enablePositionIncrements = true;

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java Fri Nov 25 16:54:07 2011
@@ -25,7 +25,6 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.*;
-import org.apache.lucene.util.Version;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.solr.analysis.*;
 import org.apache.solr.common.SolrException;
@@ -126,6 +125,14 @@ public class SolrQueryParser extends Que
     }
   }
 
+  protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) {
+    if (part == null) return part;
+
+    SchemaField sf = schema.getFieldOrNull((field));
+    if (sf == null || !(sf.getType() instanceof TextField)) return part;
+    return analyzeMultitermTerm(field, part, analyzer);
+  }
+
   @Override
   protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
     checkNullField(field);
@@ -161,6 +168,9 @@ public class SolrQueryParser extends Que
   @Override
   protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException {
     checkNullField(field);
+    part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer());
+    part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer());
+
     SchemaField sf = schema.getField(field);
     return sf.getType().getRangeQuery(parser, sf,
             "*".equals(part1) ? null : part1,
@@ -175,6 +185,8 @@ public class SolrQueryParser extends Que
       termStr = termStr.toLowerCase();
     }
 
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+
     // TODO: toInternal() won't necessarily work on partial
     // values, so it looks like we need a getPrefix() function
     // on fieldtype?  Or at the minimum, a method on fieldType
@@ -189,14 +201,14 @@ public class SolrQueryParser extends Que
     PrefixQuery prefixQuery = new PrefixQuery(t);
     return prefixQuery;
   }
-
   @Override
   protected Query getWildcardQuery(String field, String termStr) throws ParseException {
     // *:* -> MatchAllDocsQuery
     if ("*".equals(field) && "*".equals(termStr)) {
       return newMatchAllDocsQuery();
     }
-    
+    termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+
     // can we use reversed wildcards in this field?
     String type = schema.getFieldType(field).getTypeName();
     ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
@@ -216,4 +228,5 @@ public class SolrQueryParser extends Que
     }
     return q;
   }
+
 }

Added: lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml?rev=1206258&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml (added)
+++ lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-folding.xml Fri Nov 25 16:54:07 2011
@@ -0,0 +1,145 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<schema name="test" version="1.0">
+  <types>
+    <fieldtype name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
+
+    <fieldType name="text" class="solr.TextField" multiValued="false">
+      <analyzer>
+        <tokenizer class="solr.PatternTokenizerFactory" pattern="\s+"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_multi" class="solr.TextField" multiValued="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.TrimFilterFactory"/>
+      </analyzer>
+      <analyzer type="multiterm">        <!-- Intentionally different to test that these are kept  distinct -->
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_multi_bad" class="solr.TextField" multiValued="false">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.TrimFilterFactory"/>
+      </analyzer>
+      <analyzer type="multiterm">        <!-- Intentionally different to test that these are kept  distinct -->
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
+                catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+
+    <fieldType name="text_ws" class="solr.TextField" multiValued="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_rev" class="solr.TextField" legacyMultiTerm="false">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="false"
+                maxPosAsterisk="1" maxPosQuestion="2" maxFractionAsterisk="0.99"
+                minTrailing="1"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_lower_tokenizer" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_charfilter" class="solr.TextField" multiValued="false">
+      <analyzer type="index">
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="text_oldstyle" class="solr.TextField" multiValued="false" legacyMultiTerm="true">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.TrimFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="byte" class="solr.ByteField" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="short" class="solr.ShortField" omitNorms="true" positionIncrementGap="0"/>
+    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+    <fieldtype name="date" class="solr.TrieDateField" precisionStep="0"/>
+  </types>
+
+  <fields>
+    <field name="id" type="string" indexed="true" stored="true" required="true"/>
+    <field name="int_f" type="int"/>
+    <field name="float_f" type="float"/>
+    <field name="long_f" type="long"/>
+    <field name="double_f" type="double"/>
+    <field name="byte_f" type="byte"/>
+    <field name="short_f" type="short"/>
+    <field name="bool_f" type="boolean"/>
+    <field name="date_f" type="date"/>
+
+    <field name="content" type="text" indexed="true" stored="true"/>
+    <field name="content_ws" type="text_ws" indexed="true" stored="true"/>
+    <field name="content_rev" type="text_rev" indexed="true" stored="true"/>
+    <field name="content_multi" type="text_multi" indexed="true" stored="true"/>
+    <field name="content_lower_token" type="text_multi" indexed="true" stored="true"/>
+    <field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
+    <field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
+    <field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
+  </fields>
+
+  <defaultSearchField>content</defaultSearchField>
+  <uniqueKey>id</uniqueKey>
+
+</schema>

Added: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java?rev=1206258&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java (added)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java Fri Nov 25 16:54:07 2011
@@ -0,0 +1,87 @@
+package org.apache.solr.schema;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.analysis.*;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class MultiTermTest extends SolrTestCaseJ4 {
+  public String getCoreName() {
+    return "basic";
+  }
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore("solrconfig.xml", "schema-folding.xml");
+  }
+
+  @Test
+  public void testMultiFound() {
+    SchemaField field = h.getCore().getSchema().getField("content_multi");
+    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    TokenizerChain tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
+    }
+
+    analyzer = field.getType().getAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory));
+    }
+
+    assertTrue(tc.getCharFilterFactories().length == 0);
+  }
+
+  @Test
+  public void testQueryCopiedToMulti() {
+    SchemaField field = h.getCore().getSchema().getField("content_charfilter");
+    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    TokenizerChain tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue(factory instanceof LowerCaseFilterFactory);
+    }
+
+    assertTrue(tc.getCharFilterFactories().length == 1);
+    assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
+  }
+
+  @Test
+  public void testDefaultCopiedToMulti() {
+    SchemaField field = h.getCore().getSchema().getField("content_ws");
+    Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+    assertTrue(analyzer instanceof TokenizerChain);
+    assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+    TokenizerChain tc = (TokenizerChain) analyzer;
+    for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
+      assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
+    }
+
+    assertTrue(tc.getCharFilterFactories().length == 0);
+
+  }
+}

Added: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java?rev=1206258&view=auto
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java (added)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java Fri Nov 25 16:54:07 2011
@@ -0,0 +1,231 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
+
+  public String getCoreName() {
+    return "basic";
+  }
+
+  @BeforeClass
+  public static void beforeTests() throws Exception {
+    initCore("solrconfig.xml", "schema-folding.xml");
+    IndexWriter iw;
+
+    String docs[] = {
+        "abcdefg1 finger",
+        "gangs hijklmn1",
+        "opqrstu1 zilly",
+    };
+
+    // prepare the index
+    for (int i = 0; i < docs.length; i++) {
+      String num = Integer.toString(i);
+      String boolVal = ((i % 2) == 0) ? "true" : "false";
+      assertU(adoc("id", num,
+          "int_f", num,
+          "float_f", num,
+          "long_f", num,
+          "double_f", num,
+          "byte_f", num,
+          "short_f", num,
+          "bool_f", boolVal,
+          "date_f", "200" + Integer.toString(i % 10) + "-01-01T00:00:00Z",
+          "content", docs[i],
+          "content_ws", docs[i],
+          "content_rev", docs[i],
+          "content_multi", docs[i],
+          "content_lower_token", docs[i],
+          "content_oldstyle", docs[i],
+          "content_charfilter", docs[i],
+          "content_multi_bad", docs[i]
+      ));
+    }
+    assertU(optimize());
+  }
+
+  @Test
+  public void testPrefixCaseAccentFolding() throws Exception {
+    String matchOneDocPrefixUpper[][] = {
+        {"A*", "ÁB*", "ABÇ*"},   // these should find only doc 0
+        {"H*", "HÏ*", "HìJ*"},   // these should find only doc 1
+        {"O*", "ÖP*", "OPQ*"},   // these should find only doc 2
+    };
+
+    String matchRevPrefixUpper[][] = {
+        {"*Ğ1", "*DEfG1", "*EfG1"},
+        {"*N1", "*LmŊ1", "*MÑ1"},
+        {"*Ǖ1", "*sTu1", "*RŠTU1"}
+    };
+
+    // test the prefix queries find only one doc where the query is uppercased. Must go through query parser here!
+    for (int idx = 0; idx < matchOneDocPrefixUpper.length; idx++) {
+      for (int jdx = 0; jdx < matchOneDocPrefixUpper[idx].length; jdx++) {
+        String me = matchOneDocPrefixUpper[idx][jdx];
+        assertQ(req("q", "content:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_ws:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_multi:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_lower_token:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+      }
+    }
+    for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
+      for (int jdx = 0; jdx < matchRevPrefixUpper[idx].length; jdx++) {
+        String me = matchRevPrefixUpper[idx][jdx];
+        assertQ(req("q", "content_rev:" + me),
+            "//*[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+      }
+    }
+  }
+
+  // test the wildcard queries find only one doc  where the query is uppercased and/or accented.
+  @Test
+  public void testWildcardCaseAccentFolding() throws Exception {
+    String matchOneDocWildUpper[][] = {
+        {"Á*C*", "ÁB*1", "ABÇ*g1", "Á*FG1"},      // these should find only doc 0
+        {"H*k*", "HÏ*l?*", "HìJ*n*", "HìJ*m*"},   // these should find only doc 1
+        {"O*ř*", "ÖP*ş???", "OPQ*S?Ů*", "ÖP*1"},  // these should find only doc 2
+    };
+
+    for (int idx = 0; idx < matchOneDocWildUpper.length; idx++) {
+      for (int jdx = 0; jdx < matchOneDocWildUpper[idx].length; jdx++) {
+        String me = matchOneDocWildUpper[idx][jdx];
+        assertQ("Error with " + me, req("q", "content:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_ws:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_multi:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+        assertQ(req("q", "content_lower_token:" + me),
+            "//result[@numFound='1']",
+            "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+      }
+    }
+  }
+
+  // Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go
+  // and update the documentation
+  @Test
+  public void testPhrase() {
+    assertQ(req("q", "content:\"silly ABCD*\""),
+        "//result[@numFound='0']");
+  }
+
+  // Make sure the legacy behavior flag is honored
+  @Test
+  public void testLegacyBehavior() {
+    assertQ(req("q", "content_oldstyle:ABCD*"),
+        "//result[@numFound='0']");
+  }
+
+  @Test
+  public void testWildcardRange() {
+    assertQ(req("q", "content:[* TO *]"),
+        "//result[@numFound='3']");
+  }
+
+
+  // Does the char filter get correctly handled?
+  @Test
+  public void testCharFilter() {
+    assertQ(req("q", "content_charfilter:" + "Á*C*"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+    assertQ(req("q", "content_charfilter:" + "ABÇ*g1"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+    assertQ(req("q", "content_charfilter:" + "HÏ*l?*"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='1']");
+  }
+
+  @Test
+  public void testRangeQuery() {
+    assertQ(req("q", "content:" + "{Ȫp*1 TO QŮ*}"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='2']");
+
+    assertQ(req("q", "content:" + "[Áb* TO f?Ñg?r]"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+
+  }
+
+  @Test
+  public void testNonTextTypes() {
+    String[] intTypes = {"int_f", "float_f", "long_f", "double_f", "byte_f", "short_f"};
+
+    for (String str : intTypes) {
+      assertQ(req("q", str + ":" + "0"),
+          "//result[@numFound='1']",
+          "//*[@name='id'][.='0']");
+
+      assertQ(req("q", str + ":" + "[0 TO 2]"),
+          "//result[@numFound='3']",
+          "//*[@name='id'][.='0']",
+          "//*[@name='id'][.='1']",
+          "//*[@name='id'][.='2']");
+    }
+    assertQ(req("q", "bool_f:true"),
+        "//result[@numFound='2']",
+        "//*[@name='id'][.='0']",
+        "//*[@name='id'][.='2']");
+
+    assertQ(req("q", "bool_f:[false TO true]"),
+        "//result[@numFound='3']",
+        "//*[@name='id'][.='0']",
+        "//*[@name='id'][.='1']",
+        "//*[@name='id'][.='2']");
+
+    assertQ(req("q", "date_f:2000-01-01T00\\:00\\:00Z"),
+        "//result[@numFound='1']",
+        "//*[@name='id'][.='0']");
+
+    assertQ(req("q", "date_f:[2000-12-31T23:59:59.999Z TO 2002-01-02T00:00:01Z]"),
+        "//result[@numFound='2']",
+        "//*[@name='id'][.='1']",
+        "//*[@name='id'][.='2']");
+  }
+
+  @Test
+  public void testMultiBad() {
+    try {
+      assertQ(req("q", "content_multi_bad:" + "abCD*"));
+      fail("Should throw exception when token evaluates to more than one term");
+    } catch (Exception expected) {
+      assertTrue(expected.getCause() instanceof IllegalArgumentException);
+    }
+  }
+}
\ No newline at end of file

Modified: lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml?rev=1206258&r1=1206257&r2=1206258&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/branches/branch_3x/solr/example/solr/conf/schema.xml Fri Nov 25 16:54:07 2011
@@ -443,6 +443,78 @@
         <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
       </analyzer>
     </fieldType>
+    
+    <!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
+         parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
+         behavior as of 3.6+ is to automatically define a multiterm analyzer
+    -->
+    <fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm 
+           Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
+           applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
+           things, not having to lowercase wildcard terms on the client.
+           
+           In the absence of this section, the new default behavior (3.6, 4.0) is to construct
+           one of these from the query analyzer that incorporates any defined charfilters, a
+           WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter 
+           (if defined).
+           
+           Arguably, this is an expert-level analyzer, most cases will be handled by an instance
+           of this being automatically constructed from the queryanalyzer.
+           
+      -->
+      <analyzer type="multiterm"> 
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
+         parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
+         behavior as of 3.6+ is to automatically define a multiterm analyzer
+    -->
+    <fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm 
+           Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
+           applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
+           things, not having to lowercase wildcard terms on the client.
+           
+           In the absence of this section, the new default behavior (3.6, 4.0) is to construct
+           one of these from the query analyzer that incorporates any defined charfilters, a
+           WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter 
+           (if defined).
+           
+           Arguably, this is an expert-level analyzer, most cases will be handled by an instance
+           of this being automatically constructed from the queryanalyzer.
+           
+      -->
+      <analyzer type="multiterm"> 
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+      </analyzer>
+    </fieldType>
 
     <!-- since fields of this type are by default not stored or indexed,
          any data added to them will be ignored outright.  --> 
@@ -552,7 +624,6 @@
    <!--
    <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
      -->
-   
 
    <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
         will be used if the name matches any of the patterns.