You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/11/05 21:07:20 UTC
svn commit: r1031765 - in /lucene/dev/trunk: lucene/
lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/
lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/
lucene/src/java/org/apache/lucene/index/c...
Author: rmuir
Date: Fri Nov 5 20:07:20 2010
New Revision: 1031765
URL: http://svn.apache.org/viewvc?rev=1031765&view=rev
Log:
LUCENE-588: Escaped wildcard character in wildcard term not handled correctly
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java
lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Nov 5 20:07:20 2010
@@ -113,6 +113,12 @@ Changes in backwards compatibility polic
* LUCENE-2674: MultiTermQuery.TermCollector.collect now accepts the
TermsEnum as well. (Robert Muir, Mike McCandless)
+* LUCENE-588: WildcardQuery and QueryParser now allows escaping with
+ the '\' character. Previously this was impossible (you could not escape */?,
+ for example). If your code somehow depends on the old behavior, you will
+ need to change it (e.g. using "\\" to escape '\' itself).
+ (Sunil Kamath, Terry Yang via Robert Muir)
+
Changes in Runtime Behavior
* LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java Fri Nov 5 20:07:20 2010
@@ -49,9 +49,9 @@ public class WildcardQueryNode extends F
@Override
public CharSequence toQueryString(EscapeQuerySyntax escaper) {
if (isDefaultField(this.field)) {
- return getTermEscaped(escaper);
+ return this.text;
} else {
- return this.field + ":" + getTermEscaped(escaper);
+ return this.field + ":" + this.text;
}
}
Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Fri Nov 5 20:07:20 2010
@@ -76,6 +76,7 @@ import org.apache.lucene.util.LuceneTest
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Ignore;
/**
* This test case is a copy of the core Lucene query parser test, it was adapted
@@ -945,6 +946,15 @@ public class TestQPHelper extends Lucene
assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&");
}
+ @Ignore("contrib queryparser shouldn't escape wildcard terms")
+ public void testEscapedWildcard() throws Exception {
+ StandardQueryParser qp = new StandardQueryParser();
+ qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+
+ WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
+ assertEquals(q, qp.parse("foo\\?ba?r", "field"));
+ }
+
public void testTabNewlineCarriageReturn() throws Exception {
assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Fri Nov 5 20:07:20 2010
@@ -33,7 +33,11 @@ import org.apache.lucene.util.StringHelp
import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
+import java.util.Set;
import java.util.HashMap;
+import java.util.TreeMap;
+import java.util.SortedMap;
+import java.util.Iterator;
class SimpleTextFieldsReader extends FieldsProducer {
@@ -78,7 +82,7 @@ class SimpleTextFieldsReader extends Fie
private class SimpleTextFieldsEnum extends FieldsEnum {
private final IndexInput in;
private final BytesRef scratch = new BytesRef(10);
- private boolean omitTF;
+ private String current;
public SimpleTextFieldsEnum() {
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
@@ -89,11 +93,12 @@ class SimpleTextFieldsReader extends Fie
while(true) {
readLine(in, scratch);
if (scratch.equals(END)) {
+ current = null;
return null;
}
if (scratch.startsWith(FIELD)) {
String field = StringHelper.intern(new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"));
- omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
+ current = field;
return field;
}
}
@@ -101,7 +106,7 @@ class SimpleTextFieldsReader extends Fie
@Override
public TermsEnum terms() throws IOException {
- return new SimpleTextTermsEnum(in.getFilePointer(), omitTF);
+ return SimpleTextFieldsReader.this.terms(current).iterator();
}
}
@@ -109,21 +114,42 @@ class SimpleTextFieldsReader extends Fie
private final IndexInput in;
private final boolean omitTF;
private BytesRef current;
- private final long fieldStart;
- private final BytesRef scratch = new BytesRef(10);
- private final BytesRef scratch2 = new BytesRef(10);
private int docFreq;
private long docsStart;
private boolean ended;
+ private final TreeMap<BytesRef,TermData> allTerms;
+ private Iterator<Map.Entry<BytesRef,TermData>> iter;
- public SimpleTextTermsEnum(long offset, boolean omitTF) throws IOException {
+ public SimpleTextTermsEnum(TreeMap<BytesRef,TermData> allTerms, boolean omitTF) throws IOException {
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
- this.in.seek(offset);
+ this.allTerms = allTerms;
this.omitTF = omitTF;
- fieldStart = offset;
+ iter = allTerms.entrySet().iterator();
}
public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
+
+ final SortedMap<BytesRef,TermData> tailMap = allTerms.tailMap(text);
+
+ if (tailMap.isEmpty()) {
+ current = null;
+ return SeekStatus.END;
+ } else {
+ current = tailMap.firstKey();
+ final TermData td = tailMap.get(current);
+ docsStart = td.docsStart;
+ docFreq = td.docFreq;
+ iter = tailMap.entrySet().iterator();
+ assert iter.hasNext();
+ iter.next();
+ if (current.equals(text)) {
+ return SeekStatus.FOUND;
+ } else {
+ return SeekStatus.NOT_FOUND;
+ }
+ }
+
+ /*
if (current != null) {
final int cmp = current.compareTo(text);
if (cmp == 0) {
@@ -153,6 +179,7 @@ class SimpleTextFieldsReader extends Fie
current = null;
ended = true;
return SeekStatus.END;
+ */
}
@Override
@@ -162,6 +189,20 @@ class SimpleTextFieldsReader extends Fie
@Override
public BytesRef next() throws IOException {
assert !ended;
+
+ if (iter.hasNext()) {
+ Map.Entry<BytesRef,TermData> ent = iter.next();
+ current = ent.getKey();
+ TermData td = ent.getValue();
+ docFreq = td.docFreq;
+ docsStart = td.docsStart;
+ return current;
+ } else {
+ current = null;
+ return null;
+ }
+
+ /*
readLine(in, scratch);
if (scratch.equals(END) || scratch.startsWith(FIELD)) {
ended = true;
@@ -192,6 +233,7 @@ class SimpleTextFieldsReader extends Fie
in.seek(lineStart);
return current;
}
+ */
}
@Override
@@ -447,20 +489,70 @@ class SimpleTextFieldsReader extends Fie
}
}
+ static class TermData {
+ public long docsStart;
+ public int docFreq;
+
+ public TermData(long docsStart, int docFreq) {
+ this.docsStart = docsStart;
+ this.docFreq = docFreq;
+ }
+ }
+
private class SimpleTextTerms extends Terms {
private final String field;
private final long termsStart;
private final boolean omitTF;
- public SimpleTextTerms(String field, long termsStart) {
+ // NOTE: horribly, horribly RAM consuming, but then
+ // SimpleText should never be used in production
+ private final TreeMap<BytesRef,TermData> allTerms = new TreeMap<BytesRef,TermData>();
+
+ private final BytesRef scratch = new BytesRef(10);
+
+ public SimpleTextTerms(String field, long termsStart) throws IOException {
this.field = StringHelper.intern(field);
this.termsStart = termsStart;
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
+ loadTerms();
+ }
+
+ private void loadTerms() throws IOException {
+ IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+ in.seek(termsStart);
+ final BytesRef lastTerm = new BytesRef(10);
+ long lastDocsStart = -1;
+ int docFreq = 0;
+ while(true) {
+ readLine(in, scratch);
+ if (scratch.equals(END) || scratch.startsWith(FIELD)) {
+ if (lastDocsStart != -1) {
+ allTerms.put(new BytesRef(lastTerm),
+ new TermData(lastDocsStart, docFreq));
+ }
+ break;
+ } else if (scratch.startsWith(DOC)) {
+ docFreq++;
+ } else if (scratch.startsWith(TERM)) {
+ if (lastDocsStart != -1) {
+ allTerms.put(new BytesRef(lastTerm),
+ new TermData(lastDocsStart, docFreq));
+ }
+ lastDocsStart = in.getFilePointer();
+ final int len = scratch.length - TERM.length;
+ if (len > lastTerm.length) {
+ lastTerm.grow(len);
+ }
+ System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
+ lastTerm.length = len;
+ docFreq = 0;
+ }
+ }
}
@Override
public TermsEnum iterator() throws IOException {
- return new SimpleTextTermsEnum(termsStart, omitTF);
+ return new SimpleTextTermsEnum(allTerms, omitTF);
}
@Override
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java Fri Nov 5 20:07:20 2010
@@ -1045,7 +1045,7 @@ public abstract class QueryParserBase {
String termImage=discardEscapeChar(term.image);
if (wildcard) {
- q = getWildcardQuery(qfield, termImage);
+ q = getWildcardQuery(qfield, term.image);
} else if (prefix) {
q = getPrefixQuery(qfield,
discardEscapeChar(term.image.substring
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java Fri Nov 5 20:07:20 2010
@@ -45,6 +45,9 @@ public class WildcardQuery extends Autom
/** Char equality with support for wildcards */
public static final char WILDCARD_CHAR = '?';
+ /** Escape character */
+ public static final char WILDCARD_ESCAPE = '\\';
+
/**
* Constructs a query for terms matching <code>term</code>.
*/
@@ -56,6 +59,7 @@ public class WildcardQuery extends Autom
* Convert Lucene wildcard syntax into an automaton.
* @lucene.internal
*/
+ @SuppressWarnings("fallthrough")
public static Automaton toAutomaton(Term wildcardquery) {
List<Automaton> automata = new ArrayList<Automaton>();
@@ -63,6 +67,7 @@ public class WildcardQuery extends Autom
for (int i = 0; i < wildcardText.length();) {
final int c = wildcardText.codePointAt(i);
+ int length = Character.charCount(c);
switch(c) {
case WILDCARD_STRING:
automata.add(BasicAutomata.makeAnyString());
@@ -70,10 +75,18 @@ public class WildcardQuery extends Autom
case WILDCARD_CHAR:
automata.add(BasicAutomata.makeAnyChar());
break;
+ case WILDCARD_ESCAPE:
+ // add the next codepoint instead, if it exists
+ if (i + length < wildcardText.length()) {
+ final int nextChar = wildcardText.codePointAt(i + length);
+ length += Character.charCount(nextChar);
+ automata.add(BasicAutomata.makeChar(nextChar));
+ break;
+ } // else fallthru, lenient parsing with a trailing \
default:
automata.add(BasicAutomata.makeChar(c));
}
- i += Character.charCount(c);
+ i += length;
}
return BasicOperations.concatenate(automata);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java Fri Nov 5 20:07:20 2010
@@ -178,7 +178,7 @@ final public class SpecialOperations {
* Reverses the language of the given (non-singleton) automaton while returning
* the set of new initial states.
*/
- static Set<State> reverse(Automaton a) {
+ public static Set<State> reverse(Automaton a) {
a.expandSingleton();
// reverse all edges
HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Fri Nov 5 20:07:20 2010
@@ -770,11 +770,11 @@ public class TestQueryParser extends Luc
assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
- assertQueryEquals("a:b\\-?c", a, "a:b-?c");
- assertQueryEquals("a:b\\+?c", a, "a:b+?c");
- assertQueryEquals("a:b\\:?c", a, "a:b:?c");
+ assertQueryEquals("a:b\\-?c", a, "a:b\\-?c");
+ assertQueryEquals("a:b\\+?c", a, "a:b\\+?c");
+ assertQueryEquals("a:b\\:?c", a, "a:b\\:?c");
- assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
+ assertQueryEquals("a:b\\\\?c", a, "a:b\\\\?c");
assertQueryEquals("a:b\\-c~", a, "a:b-c~2.0");
assertQueryEquals("a:b\\+c~", a, "a:b+c~2.0");
@@ -1062,6 +1062,12 @@ public class TestQueryParser extends Luc
}
+ public void testEscapedWildcard() throws Exception {
+ QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+ WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
+ assertEquals(q, qp.parse("foo\\?ba?r"));
+ }
+
public void testRegexps() throws Exception {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java Fri Nov 5 20:07:20 2010
@@ -205,6 +205,38 @@ public class TestWildcard
indexStore.close();
}
+ /**
+ * Tests if wildcard escaping works
+ */
+ public void testEscapes() throws Exception {
+ Directory indexStore = getIndexStore("field",
+ new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"});
+ IndexSearcher searcher = new IndexSearcher(indexStore, true);
+
+ // without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar
+ WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar"));
+ assertMatches(searcher, unescaped, 4);
+
+ // with escape: only matches foo*bar
+ WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar"));
+ assertMatches(searcher, escaped, 1);
+
+ // without escape: matches foo??bar and fooCDbar
+ unescaped = new WildcardQuery(new Term("field", "foo??bar"));
+ assertMatches(searcher, unescaped, 2);
+
+ // with escape: matches foo??bar only
+ escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar"));
+ assertMatches(searcher, escaped, 1);
+
+ // check escaping at end: lenient parse yields "foo\"
+ WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\"));
+ assertMatches(searcher, atEnd, 1);
+
+ searcher.close();
+ indexStore.close();
+ }
+
private Directory getIndexStore(String field, String[] contents)
throws IOException {
Directory indexStore = newDirectory();
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java Fri Nov 5 20:07:20 2010
@@ -30,6 +30,7 @@ import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.lucene.analysis.Analyzer;
import org.apache.solr.analysis.*;
import org.apache.solr.common.SolrException;
@@ -202,37 +203,36 @@ public class SolrQueryParser extends Que
String type = schema.getFieldType(field).getTypeName();
ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
if (factory != null) {
+ Term term = new Term(field, termStr);
+ // fsa representing the query
+ Automaton automaton = WildcardQuery.toAutomaton(term);
+ // TODO: we should likely use the automaton to calculate shouldReverse, too.
if (factory.shouldReverse(termStr)) {
- int len = termStr.length();
- char[] chars = new char[len+1];
- chars[0] = factory.getMarkerChar();
- termStr.getChars(0, len, chars, 1);
- ReversedWildcardFilter.reverse(chars, 1, len);
- termStr = new String(chars);
+ automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
+ SpecialOperations.reverse(automaton);
} else {
// reverse wildcardfilter is active: remove false positives
- Term term = new Term(field, termStr);
- // fsa representing the query
- Automaton a = WildcardQuery.toAutomaton(term);
// fsa representing false positives (markerChar*)
Automaton falsePositives = BasicOperations.concatenate(
BasicAutomata.makeChar(factory.getMarkerChar()),
BasicAutomata.makeAnyString());
- return new AutomatonQuery(term, BasicOperations.minus(a, falsePositives)) {
- // override toString so its completely transparent
- @Override
- public String toString(String field) {
- StringBuilder buffer = new StringBuilder();
- if (!getField().equals(field)) {
- buffer.append(getField());
- buffer.append(":");
- }
- buffer.append(term.text());
- buffer.append(ToStringUtils.boost(getBoost()));
- return buffer.toString();
- }
- };
+ // subtract these away
+ automaton = BasicOperations.minus(automaton, falsePositives);
}
+ return new AutomatonQuery(term, automaton) {
+ // override toString so its completely transparent
+ @Override
+ public String toString(String field) {
+ StringBuilder buffer = new StringBuilder();
+ if (!getField().equals(field)) {
+ buffer.append(getField());
+ buffer.append(":");
+ }
+ buffer.append(term.text());
+ buffer.append(ToStringUtils.boost(getBoost()));
+ return buffer.toString();
+ }
+ };
}
Query q = super.getWildcardQuery(field, termStr);
if (q instanceof WildcardQuery) {
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java Fri Nov 5 20:07:20 2010
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
import java.io.IOException;
import java.io.StringReader;
+import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.Map;
@@ -26,8 +27,10 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.SpecialOperations;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrQueryParser;
@@ -51,6 +54,8 @@ public class TestReversedWildcardFilterF
public void setUp() throws Exception {
super.setUp();
schema = new IndexSchema(solrConfig, getSchemaFile(), null);
+ clearIndex();
+ assertU(commit());
}
@Test
@@ -105,7 +110,7 @@ public class TestReversedWildcardFilterF
}
@Test
- public void testQueryParsing() throws IOException, ParseException {
+ public void testQueryParsing() throws Exception {
SolrQueryParser parserOne = new SolrQueryParser(schema, "one");
assertTrue(parserOne.getAllowLeadingWildcard());
@@ -115,28 +120,53 @@ public class TestReversedWildcardFilterF
// XXX note: this should be false, but for now we return true for any field,
// XXX if at least one field uses the reversing
assertTrue(parserThree.getAllowLeadingWildcard());
- String text = "one +two *hree f*ur fiv* *si\uD834\uDD1Ex";
- String expectedOne = "one:one +one:two one:\u0001eerh* one:\u0001ru*f one:fiv* one:\u0001x\uD834\uDD1Eis*";
- String expectedTwo = "two:one +two:two two:\u0001eerh* two:\u0001ru*f two:fiv* two:\u0001x\uD834\uDD1Eis*";
- String expectedThree = "three:one +three:two three:*hree three:f*ur three:fiv* three:*si\uD834\uDD1Ex";
- Query q = parserOne.parse(text);
- assertEquals(expectedOne, q.toString());
- q = parserTwo.parse(text);
- assertEquals(expectedTwo, q.toString());
- q = parserThree.parse(text);
- assertEquals(expectedThree, q.toString());
+
+ // add some docs
+ assertU(adoc("id", "1", "one", "one"));
+ assertU(adoc("id", "2", "two", "two"));
+ assertU(adoc("id", "3", "three", "three"));
+ assertU(adoc("id", "4", "one", "four"));
+ assertU(adoc("id", "5", "two", "five"));
+ assertU(adoc("id", "6", "three", "si\uD834\uDD1Ex"));
+ assertU(commit());
+
+ assertQ("should have matched",
+ req("+id:1 +one:one"),
+ "//result[@numFound=1]");
+
+ assertQ("should have matched",
+ req("+id:4 +one:f*ur"),
+ "//result[@numFound=1]");
+
+ assertQ("should have matched",
+ req("+id:6 +three:*si\uD834\uDD1Ex"),
+ "//result[@numFound=1]");
+
// test conditional reversal
- String condText = "*hree t*ree th*ee thr*e ?hree t?ree th?ee th?*ee " +
- "short*token ver*longtoken";
- String expected = "two:\u0001eerh* two:\u0001eer*t two:\u0001ee*ht " +
- "two:thr*e " +
- "two:\u0001eerh? two:\u0001eer?t " +
- "two:th?ee " +
- "two:th?*ee " +
- "two:short*token " +
- "two:\u0001nekotgnol*rev";
- q = parserTwo.parse(condText);
- assertEquals(expected, q.toString());
+ assertTrue(wasReversed(parserTwo, "*hree"));
+ assertTrue(wasReversed(parserTwo, "t*ree"));
+ assertTrue(wasReversed(parserTwo, "th*ee"));
+ assertFalse(wasReversed(parserTwo, "thr*e"));
+ assertTrue(wasReversed(parserTwo, "?hree"));
+ assertTrue(wasReversed(parserTwo, "t?ree"));
+ assertFalse(wasReversed(parserTwo, "th?ee"));
+ assertFalse(wasReversed(parserTwo, "th?*ee"));
+ assertFalse(wasReversed(parserTwo, "short*token"));
+ assertTrue(wasReversed(parserTwo, "ver*longtoken"));
+ }
+
+ /** fragile assert: depends on our implementation, but cleanest way to check for now */
+ private boolean wasReversed(SolrQueryParser qp, String query) throws Exception {
+ Query q = qp.parse(query);
+ if (!(q instanceof AutomatonQuery))
+ return false;
+ // this is a hack to get the protected Automaton field in AutomatonQuery,
+ // may break in later lucene versions - we have no getter... for good reasons.
+ final Field automatonField = AutomatonQuery.class.getDeclaredField("automaton");
+ automatonField.setAccessible(true);
+ Automaton automaton = (Automaton) automatonField.get(q);
+ String prefix = SpecialOperations.getCommonPrefix(automaton);
+ return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
}
@Test