You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2011/11/27 18:04:41 UTC
svn commit: r1206767 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/analysis/
core/src/java/org/apache/solr/schema/ core/src/java/org/apache/solr/search/
core/src/test-files/solr/conf/ core/src/test/org/apache/solr/schema/
core/src/test/...
Author: erick
Date: Sun Nov 27 17:04:38 2011
New Revision: 1206767
URL: http://svn.apache.org/viewvc?rev=1206767&view=rev
Log: (empty)
Added:
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SchemaField.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml
lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
lucene/dev/trunk/solr/example/solr/conf/schema.xml
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Nov 27 17:04:38 2011
@@ -192,6 +192,11 @@ New Features
* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
a complete analysis chain for multiterm queries.
(Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
+
+* SOLR-2918 Improvement to SOLR-2438, added MultiTermAwareComponent to the various classes
+ that should transform multiterm queries in various ways, and use this as the criteria for
+ adding them to the multiterm analyzer that is constructed if not specified in the
+ <fieldType>
Optimizations
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/ASCIIFoldingFilterFactory.java Sun Nov 27 17:04:38 2011
@@ -32,9 +32,14 @@ import org.apache.lucene.analysis.TokenS
* </fieldType></pre>
*
*/
-public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory {
+public class ASCIIFoldingFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
public ASCIIFoldingFilter create(TokenStream input) {
return new ASCIIFoldingFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseFilterFactory.java Sun Nov 27 17:04:38 2011
@@ -33,7 +33,7 @@ import org.apache.lucene.analysis.core.L
* </fieldType></pre>
*
*/
-public class LowerCaseFilterFactory extends BaseTokenFilterFactory {
+public class LowerCaseFilterFactory extends BaseTokenFilterFactory implements MultiTermAwareComponent {
@Override
public void init(Map<String,String> args) {
super.init(args);
@@ -43,4 +43,9 @@ public class LowerCaseFilterFactory exte
public LowerCaseFilter create(TokenStream input) {
return new LowerCaseFilter(luceneMatchVersion,input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/LowerCaseTokenizerFactory.java Sun Nov 27 17:04:38 2011
@@ -17,6 +17,7 @@
package org.apache.solr.analysis;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import java.io.Reader;
@@ -32,7 +33,7 @@ import java.util.Map;
* </fieldType></pre>
*
*/
-public class LowerCaseTokenizerFactory extends BaseTokenizerFactory {
+public class LowerCaseTokenizerFactory extends BaseTokenizerFactory implements MultiTermAwareComponent {
@Override
public void init(Map<String,String> args) {
super.init(args);
@@ -42,4 +43,11 @@ public class LowerCaseTokenizerFactory e
public LowerCaseTokenizer create(Reader input) {
return new LowerCaseTokenizer(luceneMatchVersion,input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ LowerCaseFilterFactory filt = new LowerCaseFilterFactory();
+ filt.init(args);
+ return filt;
+ }
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MappingCharFilterFactory.java Sun Nov 27 17:04:38 2011
@@ -46,7 +46,7 @@ import org.apache.solr.util.plugin.Resou
*
*/
public class MappingCharFilterFactory extends BaseCharFilterFactory implements
- ResourceLoaderAware {
+ ResourceLoaderAware, MultiTermAwareComponent {
protected NormalizeCharMap normMap;
private String mapping;
@@ -126,4 +126,9 @@ public class MappingCharFilterFactory ex
}
return new String( out, 0, writePos );
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java?rev=1206767&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/MultiTermAwareComponent.java Sun Nov 27 17:04:38 2011
@@ -0,0 +1,31 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Add to any analysis factory component to allow returning an
+ * analysis component factory for use with partial terms in prefix queries,
+ * wildcard queries, range query endpoints, regex queries, etc.
+ *
+ * @lucene.experimental
+ */
+public interface MultiTermAwareComponent {
+ /** Returns an analysis component to handle analysis if multi-term queries.
+ * The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
+ */
+ public Object getMultiTermComponent();
+}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java Sun Nov 27 17:04:38 2011
@@ -31,10 +31,15 @@ import org.apache.lucene.analysis.fa.Per
* </fieldType></pre>
*
*/
-public class PersianCharFilterFactory extends BaseCharFilterFactory {
+public class PersianCharFilterFactory extends BaseCharFilterFactory implements MultiTermAwareComponent {
@Override
public CharStream create(CharStream input) {
return new PersianCharFilter(input);
}
+
+ @Override
+ public Object getMultiTermComponent() {
+ return this;
+ }
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/TokenFilterFactory.java Sun Nov 27 17:04:38 2011
@@ -67,3 +67,4 @@ public interface TokenFilterFactory {
/** Transform the specified input TokenStream */
public TokenStream create(TokenStream input);
}
+
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldProperties.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldProperties.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldProperties.java Sun Nov 27 17:04:38 2011
@@ -48,15 +48,13 @@ public abstract class FieldProperties {
protected final static int REQUIRED = 0x00001000;
protected final static int OMIT_POSITIONS = 0x00002000;
- protected final static int LEGACY_MULTITERM = 0x00004000;
-
+
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
"binary", "omitNorms", "omitTermFreqAndPositions",
"termVectors", "termPositions", "termOffsets",
"multiValued",
- "sortMissingFirst","sortMissingLast","required", "omitPositions" ,
- "legacyMultiTerm"
+ "sortMissingFirst","sortMissingLast","required", "omitPositions"
};
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java Sun Nov 27 17:04:38 2011
@@ -429,21 +429,6 @@ public abstract class FieldType extends
protected Analyzer queryAnalyzer=analyzer;
/**
- * Analyzer set by schema for text types to use when searching fields
- * of this type, subclasses can set analyzer themselves or override
- * getAnalyzer()
- * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
- * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
- * lowercasing filters, and charfilters.
- *
- * If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file
- * @see #getMultiTermAnalyzer
- * @see #setMultiTermAnalyzer
- */
- protected Analyzer multiTermAnalyzer=null;
-
-
- /**
* Returns the Analyzer to be used when indexing fields of this type.
* <p>
* This method may be called many times, at any time.
@@ -465,20 +450,6 @@ public abstract class FieldType extends
return queryAnalyzer;
}
- /**
- * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
- * <p>
- * This method may be called many times, at any time.
- * </p>
- * @see #getAnalyzer
- */
- public Analyzer getMultiTermAnalyzer() {
- return multiTermAnalyzer;
- }
-
- private final String analyzerError =
- "FieldType: " + this.getClass().getSimpleName() +
- " (" + typeName + ") does not support specifying an analyzer";
/**
* Sets the Analyzer to be used when indexing fields of this type.
@@ -524,28 +495,6 @@ public abstract class FieldType extends
throw e;
}
- /**
- * Sets the Analyzer to be used when querying fields of this type.
- *
- * <p>
- *
- * Subclasses that override this method need to ensure the behavior
- * of the analyzer is consistent with the implementation of toInternal.
- * </p>
- *
- * @see #toInternal
- * @see #setAnalyzer
- * @see #getQueryAnalyzer
- */
- public void setMultiTermAnalyzer(Analyzer analyzer) {
- SolrException e = new SolrException
- (ErrorCode.SERVER_ERROR,
- "FieldType: " + this.getClass().getSimpleName() +
- " (" + typeName + ") does not support specifying an analyzer");
- SolrException.logOnce(log,null,e);
- throw e;
- }
-
/** @lucene.internal */
protected Similarity similarity;
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java Sun Nov 27 17:04:38 2011
@@ -102,15 +102,13 @@ public final class FieldTypePluginLoader
if (queryAnalyzer==null) queryAnalyzer=analyzer;
if (analyzer==null) analyzer=queryAnalyzer;
if (multiAnalyzer == null) {
- Boolean legacyMatch = ! schema.getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_36);
- legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch :
- Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null));
- multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch);
+ multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer);
}
if (analyzer!=null) {
ft.setAnalyzer(analyzer);
ft.setQueryAnalyzer(queryAnalyzer);
- ft.setMultiTermAnalyzer(multiAnalyzer);
+ if (ft instanceof TextField)
+ ((TextField)ft).setMultiTermAnalyzer(multiAnalyzer);
}
if (similarity!=null) {
ft.setSimilarity(similarity);
@@ -143,36 +141,75 @@ public final class FieldTypePluginLoader
// 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
// Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
- private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) {
+ private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
if (queryAnalyzer == null) return null;
- if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) {
+ if (!(queryAnalyzer instanceof TokenizerChain)) {
return new KeywordAnalyzer();
}
TokenizerChain tc = (TokenizerChain) queryAnalyzer;
+ MultiTermChainBuilder builder = new MultiTermChainBuilder();
- // we know it'll never be longer than this unless the code below is explicitly changed
- TokenFilterFactory[] filters = new TokenFilterFactory[2];
- int idx = 0;
- for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
- if (factory instanceof LowerCaseFilterFactory) {
- filters[idx] = new LowerCaseFilterFactory();
- filters[idx++].init(factory.getArgs());
- }
- if (factory instanceof ASCIIFoldingFilterFactory) {
- filters[idx] = new ASCIIFoldingFilterFactory();
- filters[idx++].init(factory.getArgs());
- }
- }
- WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory();
- white.init(tc.getTokenizerFactory().getArgs());
-
- return new TokenizerChain(tc.getCharFilterFactories(),
- white,
- Arrays.copyOfRange(filters, 0, idx));
+ CharFilterFactory[] charFactories = tc.getCharFilterFactories();
+ if (charFactories != null) {
+ for (CharFilterFactory fact : charFactories) {
+ builder.add(fact);
+ }
+ }
+
+ builder.add(tc.getTokenizerFactory());
+
+ for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
+ builder.add(fact);
+ }
+
+ return builder.build();
}
+ private static class MultiTermChainBuilder {
+ static final KeywordTokenizerFactory keyFactory;
+
+ static {
+ keyFactory = new KeywordTokenizerFactory();
+ keyFactory.init(new HashMap<String,String>());
+ }
+
+ ArrayList<CharFilterFactory> charFilters = null;
+ ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>(2);
+ TokenizerFactory tokenizer = keyFactory;
+
+ public void add(Object current) {
+ if (!(current instanceof MultiTermAwareComponent)) return;
+ Object newComponent = ((MultiTermAwareComponent)current).getMultiTermComponent();
+ if (newComponent instanceof TokenFilterFactory) {
+ if (filters == null) {
+ filters = new ArrayList<TokenFilterFactory>(2);
+ }
+ filters.add((TokenFilterFactory)newComponent);
+ } else if (newComponent instanceof TokenizerFactory) {
+ tokenizer = (TokenizerFactory)newComponent;
+ } else if (newComponent instanceof CharFilterFactory) {
+ if (charFilters == null) {
+ charFilters = new ArrayList<CharFilterFactory>(1);
+ }
+ charFilters.add( (CharFilterFactory)newComponent);
+
+ } else {
+ throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
+ }
+ }
+
+ public TokenizerChain build() {
+ CharFilterFactory[] charFilterArr = charFilters == null ? null : charFilters.toArray(new CharFilterFactory[charFilters.size()]);
+ TokenFilterFactory[] filterArr = filters == null ? new TokenFilterFactory[0] : filters.toArray(new TokenFilterFactory[filters.size()]);
+ return new TokenizerChain(charFilterArr, tokenizer, filterArr);
+ }
+
+
+ }
+
+
//
// <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
//
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SchemaField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SchemaField.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SchemaField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/SchemaField.java Sun Nov 27 17:04:38 2011
@@ -97,10 +97,6 @@ public final class SchemaField extends F
boolean isTokenized() { return (properties & TOKENIZED)!=0; }
boolean isBinary() { return (properties & BINARY)!=0; }
- boolean legacyMultiTerm() {
- return (properties & LEGACY_MULTITERM) != 0;
- }
-
public IndexableField createField(Object val, float boost) {
return type.createField(this,val,boost);
}
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java Sun Nov 27 17:04:38 2011
@@ -17,13 +17,8 @@
package org.apache.solr.schema;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.PhraseQuery;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.search.*;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -32,6 +27,7 @@ import org.apache.lucene.analysis.Cachin
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.search.QParser;
@@ -48,6 +44,19 @@ import java.io.StringReader;
public class TextField extends FieldType {
protected boolean autoGeneratePhraseQueries;
+ /**
+ * Analyzer set by schema for text types to use when searching fields
+ * of this type, subclasses can set analyzer themselves or override
+ * getAnalyzer()
+ * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
+ * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
+ * lowercasing filters, and charfilters.
+ *
+ * @see #getMultiTermAnalyzer
+ * @see #setMultiTermAnalyzer
+ */
+ protected Analyzer multiTermAnalyzer=null;
+
@Override
protected void init(IndexSchema schema, Map<String,String> args) {
properties |= TOKENIZED;
@@ -63,6 +72,21 @@ public class TextField extends FieldType
super.init(schema, args);
}
+ /**
+ * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
+ * <p>
+ * This method may be called many times, at any time.
+ * </p>
+ * @see #getAnalyzer
+ */
+ public Analyzer getMultiTermAnalyzer() {
+ return multiTermAnalyzer;
+ }
+
+ public void setMultiTermAnalyzer(Analyzer analyzer) {
+ this.multiTermAnalyzer = analyzer;
+ }
+
public boolean getAutoGeneratePhraseQueries() {
return autoGeneratePhraseQueries;
}
@@ -98,11 +122,50 @@ public class TextField extends FieldType
this.queryAnalyzer = analyzer;
}
+
@Override
- public void setMultiTermAnalyzer(Analyzer analyzer) {
- this.multiTermAnalyzer = analyzer;
+ public Query getRangeQuery(QParser parser, SchemaField field, String part1, String part2, boolean minInclusive, boolean maxInclusive) {
+ Analyzer multiAnalyzer = getMultiTermAnalyzer();
+ BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
+ BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
+ return new TermRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
+ }
+
+ public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
+ if (part == null) return null;
+
+ TokenStream source;
+ try {
+ source = analyzerIn.tokenStream(field, new StringReader(part));
+ source.reset();
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
+ }
+
+ TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
+ BytesRef bytes = termAtt.getBytesRef();
+
+ try {
+ if (!source.incrementToken())
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
+ termAtt.fillBytesRef();
+ if (source.incrementToken())
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
+ }
+
+ try {
+ source.end();
+ source.close();
+ } catch (IOException e) {
+ throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
+ }
+
+ return BytesRef.deepCopyOf(bytes);
}
+
static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
int phraseSlop = 0;
boolean enablePositionIncrements = true;
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java Sun Nov 27 17:04:38 2011
@@ -58,8 +58,9 @@ public class SolrQueryParser extends Que
protected final IndexSchema schema;
protected final QParser parser;
protected final String defaultField;
- protected final Map<String, ReversedWildcardFilterFactory> leadingWildcards =
- new HashMap<String, ReversedWildcardFilterFactory>();
+
+ // implementation detail - caching ReversedWildcardFilterFactory based on type
+ private Map<FieldType, ReversedWildcardFilterFactory> leadingWildcards;
public SolrQueryParser(QParser parser, String defaultField) {
this(parser, defaultField, parser.getReq().getSchema().getQueryAnalyzer());
@@ -71,30 +72,34 @@ public class SolrQueryParser extends Que
this.parser = parser;
this.defaultField = defaultField;
setEnablePositionIncrements(true);
- checkAllowLeadingWildcards();
+ setLowercaseExpandedTerms(false);
+ setAllowLeadingWildcard(true);
}
- protected void checkAllowLeadingWildcards() {
- boolean allow = false;
- for (Entry<String, FieldType> e : schema.getFieldTypes().entrySet()) {
- Analyzer a = e.getValue().getAnalyzer();
- if (a instanceof TokenizerChain) {
- // examine the indexing analysis chain if it supports leading wildcards
- TokenizerChain tc = (TokenizerChain)a;
- TokenFilterFactory[] factories = tc.getTokenFilterFactories();
- for (TokenFilterFactory factory : factories) {
- if (factory instanceof ReversedWildcardFilterFactory) {
- allow = true;
- leadingWildcards.put(e.getKey(), (ReversedWildcardFilterFactory)factory);
- }
+ protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
+ if (leadingWildcards == null) leadingWildcards = new HashMap<FieldType, ReversedWildcardFilterFactory>();
+ ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
+ if (fac == null && leadingWildcards.containsKey(fac)) {
+ return fac;
+ }
+
+ Analyzer a = fieldType.getAnalyzer();
+ if (a instanceof TokenizerChain) {
+ // examine the indexing analysis chain if it supports leading wildcards
+ TokenizerChain tc = (TokenizerChain)a;
+ TokenFilterFactory[] factories = tc.getTokenFilterFactories();
+ for (TokenFilterFactory factory : factories) {
+ if (factory instanceof ReversedWildcardFilterFactory) {
+ fac = (ReversedWildcardFilterFactory)factory;
+ break;
}
}
}
- // XXX should be enabled on a per-field basis
- if (allow) {
- setAllowLeadingWildcard(true);
- }
+
+ leadingWildcards.put(fieldType, fac);
+ return fac;
}
+
private void checkNullField(String field) throws SolrException {
if (field == null && defaultField == null) {
@@ -104,12 +109,14 @@ public class SolrQueryParser extends Que
}
}
- protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) {
+ protected String analyzeIfMultitermTermText(String field, String part, FieldType fieldType) {
if (part == null) return part;
SchemaField sf = schema.getFieldOrNull((field));
- if (sf == null || ! (sf.getType() instanceof TextField)) return part;
- return analyzeMultitermTerm(field, part, analyzer).utf8ToString();
+ if (sf == null || ! (fieldType instanceof TextField)) return part;
+ String out = TextField.analyzeMultiTerm(field, part, ((TextField)fieldType).getMultiTermAnalyzer()).utf8ToString();
+ // System.out.println("INPUT="+part + " OUTPUT="+out);
+ return out;
}
@Override
@@ -143,8 +150,6 @@ public class SolrQueryParser extends Que
@Override
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
checkNullField(field);
- part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer());
- part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer());
SchemaField sf = schema.getField(field);
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
}
@@ -153,21 +158,10 @@ public class SolrQueryParser extends Que
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
checkNullField(field);
- termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+ termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
- // TODO: toInternal() won't necessarily work on partial
- // values, so it looks like we need a getPrefix() function
- // on fieldtype? Or at the minimum, a method on fieldType
- // that can tell me if I should lowercase or not...
- // Schema could tell if lowercase filter is in the chain,
- // but a more sure way would be to run something through
- // the first time and check if it got lowercased.
-
- // TODO: throw exception if field type doesn't support prefixes?
- // (sortable numeric types don't do prefixes, but can do range queries)
- Term t = new Term(field, termStr);
- PrefixQuery prefixQuery = new PrefixQuery(t);
- return prefixQuery;
+ // Solr has always used constant scoring for prefix queries. This should return constant scoring by default.
+ return newPrefixQuery(new Term(field, termStr));
}
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
@@ -175,10 +169,10 @@ public class SolrQueryParser extends Que
if ("*".equals(field) && "*".equals(termStr)) {
return newMatchAllDocsQuery();
}
- termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
+ FieldType fieldType = schema.getFieldType(field);
+ termStr = analyzeIfMultitermTermText(field, termStr, fieldType);
// can we use reversed wildcards in this field?
- String type = schema.getFieldType(field).getTypeName();
- ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
+ ReversedWildcardFilterFactory factory = getReversedWildcardFilterFactory(fieldType);
if (factory != null) {
Term term = new Term(field, termStr);
// fsa representing the query
@@ -211,19 +205,15 @@ public class SolrQueryParser extends Que
}
};
}
- Query q = super.getWildcardQuery(field, termStr);
- if (q instanceof WildcardQuery) {
- // use a constant score query to avoid overflowing clauses
- WildcardQuery wildcardQuery = new WildcardQuery(((WildcardQuery)q).getTerm());
- return wildcardQuery;
- }
- return q;
- }
+ // Solr has always used constant scoring for wildcard queries. This should return constant scoring by default.
+ return newWildcardQuery(new Term(field, termStr));
+ }
+ @Override
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
- termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
- return super.getRegexpQuery(field, termStr);
+ termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field));
+ return newRegexpQuery(new Term(field, termStr));
}
}
Modified: lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml (original)
+++ lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-folding.xml Sun Nov 27 17:04:38 2011
@@ -64,7 +64,7 @@
</analyzer>
</fieldType>
- <fieldType name="text_rev" class="solr.TextField" legacyMultiTerm="false">
+ <fieldType name="text_rev" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
@@ -80,12 +80,25 @@
</analyzer>
</fieldType>
- <fieldType name="text_lower_tokenizer" class="solr.TextField">
+ <fieldType name="text_lower_token" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
+ <filter class="solr.ASCIIFoldingFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_oldstyle" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.ASCIIFoldingFilterFactory"/>
+ </analyzer>
+ <analyzer type="multiterm">
+ <tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
+
<fieldType name="text_charfilter" class="solr.TextField" multiValued="false">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -99,19 +112,47 @@
</analyzer>
</fieldType>
- <fieldType name="text_oldstyle" class="solr.TextField" multiValued="false" legacyMultiTerm="true">
+ <fieldType name="text_straight" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_lower" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_folding" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ASCIIFoldingFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_stemming" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.TrimFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_keyword" class="solr.TextField" sortMissingLast="true" omitNorms="true">
+ <analyzer>
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
- <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
- <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
+
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="byte" class="solr.ByteField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="short" class="solr.ShortField" omitNorms="true" positionIncrementGap="0"/>
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
@@ -133,10 +174,17 @@
<field name="content_ws" type="text_ws" indexed="true" stored="true"/>
<field name="content_rev" type="text_rev" indexed="true" stored="true"/>
<field name="content_multi" type="text_multi" indexed="true" stored="true"/>
- <field name="content_lower_token" type="text_multi" indexed="true" stored="true"/>
+ <field name="content_lower_token" type="text_lower_token" indexed="true" stored="true"/>
<field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
<field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
<field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
+
+ <dynamicField name="*_straight" type="text_straight" indexed="true" stored="true"/>
+ <dynamicField name="*_lower" type="text_lower" indexed="true" stored="true"/>
+ <dynamicField name="*_folding" type="text_folding" indexed="true" stored="true"/>
+ <dynamicField name="*_stemming" type="text_stemming" indexed="true" stored="true"/>
+ <dynamicField name="*_keyword" type="text_keyword" indexed="true" stored="true"/>
+
</fields>
<defaultSearchField>content</defaultSearchField>
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java Sun Nov 27 17:04:38 2011
@@ -36,7 +36,7 @@ public class MultiTermTest extends SolrT
@Test
public void testMultiFound() {
SchemaField field = h.getCore().getSchema().getField("content_multi");
- Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+ Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
@@ -58,9 +58,9 @@ public class MultiTermTest extends SolrT
@Test
public void testQueryCopiedToMulti() {
SchemaField field = h.getCore().getSchema().getField("content_charfilter");
- Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+ Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
- assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+ assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue(factory instanceof LowerCaseFilterFactory);
@@ -73,15 +73,15 @@ public class MultiTermTest extends SolrT
@Test
public void testDefaultCopiedToMulti() {
SchemaField field = h.getCore().getSchema().getField("content_ws");
- Analyzer analyzer = field.getType().getMultiTermAnalyzer();
+ Analyzer analyzer = ((TextField)field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
- assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
+ assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
}
- assertTrue(tc.getCharFilterFactories().length == 0);
+ assertTrue(tc.getCharFilterFactories() == null);
}
}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java Sun Nov 27 17:04:38 2011
@@ -59,7 +59,12 @@ public class TestFoldingMultitermQuery e
"content_lower_token", docs[i],
"content_oldstyle", docs[i],
"content_charfilter", docs[i],
- "content_multi_bad", docs[i]
+ "content_multi_bad", docs[i],
+ "content_straight", docs[i],
+ "content_lower", docs[i],
+ "content_folding", docs[i],
+ "content_stemming", docs[i],
+ "content_keyword", docs[i]
));
}
assertU(optimize());
@@ -95,6 +100,8 @@ public class TestFoldingMultitermQuery e
assertQ(req("q", "content_lower_token:" + me),
"//result[@numFound='1']",
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
+ assertQ(req("q", "content_oldstyle:" + me),
+ "//result[@numFound='0']");
}
}
for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
@@ -128,13 +135,50 @@ public class TestFoldingMultitermQuery e
assertQ(req("q", "content_multi:" + me),
"//result[@numFound='1']",
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
- assertQ(req("q", "content_lower_token:" + me),
- "//result[@numFound='1']",
- "//*[@name='id'][.='" + Integer.toString(idx) + "']");
+ assertQ(req("q", "content_oldstyle:" + me),
+ "//result[@numFound='0']");
}
}
}
+ @Test
+ public void testLowerTokenizer() {
+ // The lowercasetokenizer will remove the '1' from the index, but not from the query, thus the special test.
+ assertQ(req("q", "content_lower_token:Ã*C*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_lower_token:Ã*C*1"), "//result[@numFound='0']");
+ assertQ(req("q", "content_lower_token:h*1"), "//result[@numFound='0']");
+ assertQ(req("q", "content_lower_token:H*1"), "//result[@numFound='0']");
+ assertQ(req("q", "content_lower_token:*1"), "//result[@numFound='0']");
+ assertQ(req("q", "content_lower_token:HÃ*l?*"), "//result[@numFound='1']");
+ assertQ(req("q", "content_lower_token:hÈ*l?*"), "//result[@numFound='1']");
+ }
+
+ @Test
+ public void testRegex() throws Exception {
+ assertQ(req("q", "content:/Zill[a-z]/"),
+ "//result[@numFound='1']");
+ assertQ(req("q", "content:/Zill[A-Z]/"), // everything in the regex gets lowercased?
+ "//result[@numFound='1']");
+ assertQ(req("q", "content_keyword:/.*Zill[A-Z]/"),
+ "//result[@numFound='1']");
+
+ assertQ(req("q", "content_straight:/Zill[a-z]/"), // case preserving field shouldn't match
+ "//result[@numFound='0']");
+ assertQ(req("q", "content_folding:/Zill[a-z]/"), // case preserving field shouldn't match
+ "//result[@numFound='0']");
+
+ assertQ(req("q", "content_keyword:/Abcdefg1 Finger/"), // test spaces
+ "//result[@numFound='1']");
+
+ }
+
+
+ @Test
+ public void testGeneral() throws Exception {
+ assertQ(req("q", "content_stemming:fings*"), "//result[@numFound='0']"); // should not match (but would if fings* was stemmed to fing*
+ assertQ(req("q", "content_stemming:fing*"), "//result[@numFound='1']");
+ }
+
// Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go
// and update the documentation
@Test
@@ -143,17 +187,14 @@ public class TestFoldingMultitermQuery e
"//result[@numFound='0']");
}
- // Make sure the legacy behavior flag is honored
- @Test
- public void testLegacyBehavior() {
- assertQ(req("q", "content_oldstyle:ABCD*"),
- "//result[@numFound='0']");
- }
-
@Test
public void testWildcardRange() {
assertQ(req("q", "content:[* TO *]"),
"//result[@numFound='3']");
+ assertQ(req("q", "content:[AB* TO Z*]"),
+ "//result[@numFound='3']");
+ assertQ(req("q", "content:[AB*E?G* TO TU*W]"),
+ "//result[@numFound='3']");
}
@@ -222,10 +263,13 @@ public class TestFoldingMultitermQuery e
@Test
public void testMultiBad() {
try {
+ ignoreException("analyzer returned too many terms");
assertQ(req("q", "content_multi_bad:" + "abCD*"));
fail("Should throw exception when token evaluates to more than one term");
} catch (Exception expected) {
- assertTrue(expected.getCause() instanceof IllegalArgumentException);
+ assertTrue(expected.getCause() instanceof org.apache.solr.common.SolrException);
+ } finally {
+ resetExceptionIgnores();
}
}
}
\ No newline at end of file
Modified: lucene/dev/trunk/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?rev=1206767&r1=1206766&r2=1206767&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/schema.xml Sun Nov 27 17:04:38 2011
@@ -427,41 +427,6 @@
</analyzer>
</fieldType>
- <!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
- parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
- behavior as of 3.6+ is to automatically define a multiterm analyzer
- -->
- <fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
- <analyzer type="index">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- <analyzer type="query">
- <tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- </analyzer>
- <!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm
- Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
- applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
- things, not having to lowercase wildcard terms on the client.
-
- In the absence of this section, the new default behavior (3.6, 4.0) is to construct
- one of these from the query analyzer that incorporates any defined charfilters, a
- WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter
- (if defined).
-
- Arguably, this is an expert-level analyzer, most cases will be handled by an instance
- of this being automatically constructed from the queryanalyzer.
-
- -->
- <analyzer type="multiterm">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.ASCIIFoldingFilterFactory"/>
- </analyzer>
- </fieldType>
<!-- since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright. -->
@@ -587,6 +552,7 @@
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_en" type="text_en" indexed="true" stored="true" multiValued="true" />
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>