You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2013/05/07 01:02:09 UTC
svn commit: r1479711 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/queryparser/
lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/
lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/
Author: sarowe
Date: Mon May 6 23:02:08 2013
New Revision: 1479711
URL: http://svn.apache.org/r1479711
Log:
LUCENE-949: AnalyzingQueryParser can't work with leading wildcards. (merged trunk r1479710)
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/queryparser/ (props changed)
lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1479711&r1=1479710&r2=1479711&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Mon May 6 23:02:08 2013
@@ -71,6 +71,9 @@ Bug Fixes
* LUCENE-4972: DirectoryTaxonomyWriter created empty commits even if no changes
were made. (Shai Erera, Michael McCandless)
+* LUCENE-949: AnalyzingQueryParser can't work with leading wildcards.
+ (Tim Allison, Robert Muir, Steve Rowe)
+
Optimizations
* LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher
Modified: lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java?rev=1479711&r1=1479710&r2=1479711&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java Mon May 6 23:02:08 2013
@@ -19,8 +19,8 @@ package org.apache.lucene.queryparser.an
import java.io.IOException;
import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -31,36 +31,29 @@ import org.apache.lucene.util.Version;
/**
* Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
- * are also passed through the given analyzer, but wild card characters (like <code>*</code>)
- * don't get removed from the search terms.
+ * are also passed through the given analyzer, but wildcard characters <code>*</code> and
+ * <code>?</code> don't get removed from the search terms.
*
* <p><b>Warning:</b> This class should only be used with analyzers that do not use stopwords
* or that add tokens. Also, several stemming analyzers are inappropriate: for example, GermanAnalyzer
* will turn <code>Häuser</code> into <code>hau</code>, but <code>H?user</code> will
* become <code>h?user</code> when using this parser and thus no match would be found (i.e.
* using this parser will be no improvement over QueryParser in such cases).
- *
*/
public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.QueryParser {
-
- /**
- * Constructs a query parser.
- * @param field the default field for query terms.
- * @param analyzer used to find terms in the query text.
- */
+ // gobble escaped chars or find a wildcard character
+ private final Pattern wildcardPattern = Pattern.compile("(\\.)|([?*]+)");
public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
super(matchVersion, field, analyzer);
setAnalyzeRangeTerms(true);
}
/**
- * Called when parser
- * parses an input term token that contains one or more wildcard
- * characters (like <code>*</code>), but is not a prefix term token (one
- * that has just a single * character at the end).
+ * Called when parser parses an input term that contains one or more wildcard
+ * characters (like <code>*</code>), but is not a prefix term (one that has
+ * just a single <code>*</code> character at the end).
* <p>
- * Example: will be called for <code>H?user</code> or for <code>H*user</code>
- * but not for <code>*user</code>.
+ * Example: will be called for <code>H?user</code> or for <code>H*user</code>.
* <p>
* Depending on analyzer and settings, a wildcard term may (most probably will)
* be lower-cased automatically. It <b>will</b> go through the default Analyzer.
@@ -68,113 +61,52 @@ public class AnalyzingQueryParser extend
* Overrides super class, by passing terms through analyzer.
*
* @param field Name of the field query will use.
- * @param termStr Term token that contains one or more wild card
+ * @param termStr Term that contains one or more wildcard
* characters (? or *), but is not simple prefix term
*
* @return Resulting {@link Query} built for the term
*/
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
- List<String> tlist = new ArrayList<String>();
- List<String> wlist = new ArrayList<String>();
- /* somewhat a hack: find/store wildcard chars
- * in order to put them back after analyzing */
- boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
- StringBuilder tmpBuffer = new StringBuilder();
- char[] chars = termStr.toCharArray();
- for (int i = 0; i < termStr.length(); i++) {
- if (chars[i] == '?' || chars[i] == '*') {
- if (isWithinToken) {
- tlist.add(tmpBuffer.toString());
- tmpBuffer.setLength(0);
- }
- isWithinToken = false;
- } else {
- if (!isWithinToken) {
- wlist.add(tmpBuffer.toString());
- tmpBuffer.setLength(0);
- }
- isWithinToken = true;
- }
- tmpBuffer.append(chars[i]);
- }
- if (isWithinToken) {
- tlist.add(tmpBuffer.toString());
- } else {
- wlist.add(tmpBuffer.toString());
- }
- // get Analyzer from superclass and tokenize the term
- TokenStream source;
-
- int countTokens = 0;
- try {
- source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- source.reset();
- } catch (IOException e1) {
- throw new RuntimeException(e1);
+ if (termStr == null){
+ //can't imagine this would ever happen
+ throw new ParseException("Passed null value as term to getWildcardQuery");
+ }
+ if ( ! getAllowLeadingWildcard() && (termStr.startsWith("*") || termStr.startsWith("?"))) {
+ throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"
+ + " unless getAllowLeadingWildcard() returns true");
}
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
- while (true) {
- try {
- if (!source.incrementToken()) break;
- } catch (IOException e) {
- break;
- }
- String term = termAtt.toString();
- if (!"".equals(term)) {
- try {
- tlist.set(countTokens++, term);
- } catch (IndexOutOfBoundsException ioobe) {
- countTokens = -1;
- }
- }
- }
- try {
- source.end();
- source.close();
- } catch (IOException e) {
- // ignore
- }
-
- if (countTokens != tlist.size()) {
- /* this means that the analyzer used either added or consumed
- * (common for a stemmer) tokens, and we can't build a WildcardQuery */
- throw new ParseException("Cannot build WildcardQuery with analyzer "
- + getAnalyzer().getClass() + " - tokens added or lost");
+
+ Matcher wildcardMatcher = wildcardPattern.matcher(termStr);
+ StringBuilder sb = new StringBuilder();
+ int last = 0;
+
+ while (wildcardMatcher.find()){
+ // continue if escaped char
+ if (wildcardMatcher.group(1) != null){
+ continue;
+ }
+
+ if (wildcardMatcher.start() > 0){
+ String chunk = termStr.substring(last, wildcardMatcher.start());
+ String analyzed = analyzeSingleChunk(field, termStr, chunk);
+ sb.append(analyzed);
+ }
+ //append the wildcard character
+ sb.append(wildcardMatcher.group(2));
+
+ last = wildcardMatcher.end();
}
-
- if (tlist.size() == 0) {
- return null;
- } else if (tlist.size() == 1) {
- if (wlist != null && wlist.size() == 1) {
- /* if wlist contains one wildcard, it must be at the end, because:
- * 1) wildcards are not allowed in 1st position of a term by QueryParser
- * 2) if wildcard was *not* in end, there would be *two* or more tokens */
- return super.getWildcardQuery(field, tlist.get(0)
- + wlist.get(0).toString());
- } else {
- /* we should never get here! if so, this method was called
- * with a termStr containing no wildcard ... */
- throw new IllegalArgumentException("getWildcardQuery called without wildcard");
- }
- } else {
- /* the term was tokenized, let's rebuild to one token
- * with wildcards put back in postion */
- StringBuilder sb = new StringBuilder();
- for (int i = 0; i < tlist.size(); i++) {
- sb.append( tlist.get(i));
- if (wlist != null && wlist.size() > i) {
- sb.append(wlist.get(i));
- }
- }
- return super.getWildcardQuery(field, sb.toString());
+ if (last < termStr.length()){
+ sb.append(analyzeSingleChunk(field, termStr, termStr.substring(last)));
}
+ return super.getWildcardQuery(field, sb.toString());
}
-
+
/**
* Called when parser parses an input term
- * token that uses prefix notation; that is, contains a single '*' wildcard
+ * that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily,
* this usually results in a different query object.
@@ -185,52 +117,19 @@ public class AnalyzingQueryParser extend
* Overrides super class, by passing terms through analyzer.
*
* @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
+ * @param termStr Term to use for building term for the query
* (<b>without</b> trailing '*' character!)
*
* @return Resulting {@link Query} built for the term
*/
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
- // get Analyzer from superclass and tokenize the term
- TokenStream source;
- List<String> tlist = new ArrayList<String>();
- try {
- source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- source.reset();
- } catch (IOException e1) {
- throw new RuntimeException(e1);
- }
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
- while (true) {
- try {
- if (!source.incrementToken()) break;
- } catch (IOException e) {
- break;
- }
- tlist.add(termAtt.toString());
- }
-
- try {
- source.end();
- source.close();
- } catch (IOException e) {
- // ignore
- }
-
- if (tlist.size() == 1) {
- return super.getPrefixQuery(field, tlist.get(0));
- } else {
- /* this means that the analyzer used either added or consumed
- * (common for a stemmer) tokens, and we can't build a PrefixQuery */
- throw new ParseException("Cannot build PrefixQuery with analyzer "
- + getAnalyzer().getClass()
- + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
- }
+ String analyzed = analyzeSingleChunk(field, termStr, termStr);
+ return super.getPrefixQuery(field, analyzed);
}
/**
- * Called when parser parses an input term token that has the fuzzy suffix (~) appended.
+ * Called when parser parses an input term that has the fuzzy suffix (~) appended.
* <p>
* Depending on analyzer and settings, a fuzzy term may (most probably will)
* be lower-cased automatically. It <b>will</b> go through the default Analyzer.
@@ -238,42 +137,73 @@ public class AnalyzingQueryParser extend
* Overrides super class, by passing terms through analyzer.
*
* @param field Name of the field query will use.
- * @param termStr Term token to use for building term for the query
+ * @param termStr Term to use for building term for the query
*
* @return Resulting {@link Query} built for the term
*/
@Override
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException {
- // get Analyzer from superclass and tokenize the term
- TokenStream source = null;
- String nextToken = null;
- boolean multipleTokens = false;
-
- try {
- source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
- source.reset();
- if (source.incrementToken()) {
- nextToken = termAtt.toString();
- }
- multipleTokens = source.incrementToken();
- } catch (IOException e) {
- nextToken = null;
- }
-
- try {
- source.end();
- source.close();
- } catch (IOException e) {
- // ignore
- }
+
+ String analyzed = analyzeSingleChunk(field, termStr, termStr);
+ return super.getFuzzyQuery(field, analyzed, minSimilarity);
+ }
- if (multipleTokens) {
- throw new ParseException("Cannot build FuzzyQuery with analyzer " + getAnalyzer().getClass()
- + " - tokens were added");
+ /**
+ * Returns the analyzed form for the given chunk
+ *
+ * If the analyzer produces more than one output token from the given chunk,
+ * a ParseException is thrown.
+ *
+ * @param field The target field
+ * @param termStr The full term from which the given chunk is excerpted
+ * @param chunk The portion of the given termStr to be analyzed
+ * @return The result of analyzing the given chunk
+ * @throws ParseException when analysis returns other than one output token
+ */
+ protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
+ String analyzed = null;
+ TokenStream stream = null;
+ try{
+ stream = getAnalyzer().tokenStream(field, new StringReader(chunk));
+ stream.reset();
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
+ // get first and hopefully only output token
+ if (stream.incrementToken()) {
+ analyzed = termAtt.toString();
+
+ // try to increment again, there should only be one output token
+ StringBuilder multipleOutputs = null;
+ while (stream.incrementToken()) {
+ if (null == multipleOutputs) {
+ multipleOutputs = new StringBuilder();
+ multipleOutputs.append('"');
+ multipleOutputs.append(analyzed);
+ multipleOutputs.append('"');
+ }
+ multipleOutputs.append(',');
+ multipleOutputs.append('"');
+ multipleOutputs.append(termAtt.toString());
+ multipleOutputs.append('"');
+ }
+ stream.end();
+ stream.close();
+ if (null != multipleOutputs) {
+ throw new ParseException(
+ String.format(getLocale(),
+ "Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
+ }
+ } else {
+ // nothing returned by analyzer. Was it a stop word and the user accidentally
+ // used an analyzer with stop words?
+ stream.end();
+ stream.close();
+ throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
+ }
+ } catch (IOException e){
+ throw new ParseException(
+ String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
}
-
- return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken, minSimilarity);
+ return analyzed;
}
}
Modified: lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java?rev=1479711&r1=1479710&r2=1479711&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java Mon May 6 23:02:08 2013
@@ -19,8 +19,17 @@ package org.apache.lucene.queryparser.an
import java.io.IOException;
import java.io.Reader;
+import java.util.Map;
+import java.util.TreeMap;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockBytesAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -39,7 +48,8 @@ import org.apache.lucene.util.LuceneTest
*/
@SuppressCodecs("Lucene3x") // binary terms
public class TestAnalyzingQueryParser extends LuceneTestCase {
-
+ private final static String FIELD = "field";
+
private Analyzer a;
private String[] wildcardInput;
@@ -51,12 +61,15 @@ public class TestAnalyzingQueryParser ex
private String[] fuzzyInput;
private String[] fuzzyExpected;
+ private Map<String, String> wildcardEscapeHits = new TreeMap<String, String>();
+ private Map<String, String> wildcardEscapeMisses = new TreeMap<String, String>();
+
@Override
public void setUp() throws Exception {
super.setUp();
- wildcardInput = new String[] { "übersetzung über*ung",
+ wildcardInput = new String[] { "*bersetzung über*ung",
"Mötley Cr\u00fce Mötl?* Crü?", "Renée Zellweger Ren?? Zellw?ger" };
- wildcardExpected = new String[] { "ubersetzung uber*ung", "motley crue motl?* cru?",
+ wildcardExpected = new String[] { "*bersetzung uber*ung", "motley crue motl?* cru?",
"renee zellweger ren?? zellw?ger" };
prefixInput = new String[] { "übersetzung übersetz*",
@@ -73,43 +86,138 @@ public class TestAnalyzingQueryParser ex
fuzzyExpected = new String[] { "ubersetzung ubersetzung~1",
"motley crue motley~1 crue~2", "renee zellweger renee~0 zellweger~2" };
+ wildcardEscapeHits.put("mö*tley", "moatley");
+
+ // need to have at least one genuine wildcard to trigger the wildcard analysis
+ // hence the * before the y
+ wildcardEscapeHits.put("mö\\*tl*y", "mo*tley");
+
+ // escaped backslash then true wildcard
+ wildcardEscapeHits.put("mö\\\\*tley", "mo\\atley");
+
+ // escaped wildcard then true wildcard
+ wildcardEscapeHits.put("mö\\??ley", "mo?tley");
+
+ // the first is an escaped * which should yield a miss
+ wildcardEscapeMisses.put("mö\\*tl*y", "moatley");
+
a = new ASCIIAnalyzer();
}
+ public void testSingleChunkExceptions() {
+ boolean ex = false;
+ String termStr = "the*tre";
+
+ Analyzer stopsAnalyzer = new MockAnalyzer
+ (random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
+ try {
+ String q = parseWithAnalyzingQueryParser(termStr, stopsAnalyzer, true);
+ } catch (ParseException e){
+ if (e.getMessage().contains("returned nothing")){
+ ex = true;
+ }
+ }
+ assertEquals("Should have returned nothing", true, ex);
+ ex = false;
+
+ AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
+ try{
+ qp.analyzeSingleChunk(FIELD, "", "not a single chunk");
+ } catch (ParseException e){
+ if (e.getMessage().contains("multiple terms")){
+ ex = true;
+ }
+ }
+ assertEquals("Should have produced multiple terms", true, ex);
+ }
+
+ public void testWildcardAlone() throws ParseException {
+ //seems like crazy edge case, but can be useful in concordance
+ boolean pex = false;
+ try{
+ Query q = getAnalyzedQuery("*", a, false);
+ } catch (ParseException e){
+ pex = true;
+ }
+ assertEquals("Wildcard alone with allowWildcard=false", true, pex);
+
+ pex = false;
+ try {
+ String qString = parseWithAnalyzingQueryParser("*", a, true);
+ assertEquals("Every word", "*", qString);
+ } catch (ParseException e){
+ pex = true;
+ }
+
+ assertEquals("Wildcard alone with allowWildcard=true", false, pex);
+
+ }
+ public void testWildCardEscapes() throws ParseException, IOException {
+
+ for (Map.Entry<String, String> entry : wildcardEscapeHits.entrySet()){
+ Query q = getAnalyzedQuery(entry.getKey(), a, false);
+ assertEquals("WildcardEscapeHits: " + entry.getKey(), true, isAHit(q, entry.getValue(), a));
+ }
+ for (Map.Entry<String, String> entry : wildcardEscapeMisses.entrySet()){
+ Query q = getAnalyzedQuery(entry.getKey(), a, false);
+ assertEquals("WildcardEscapeMisses: " + entry.getKey(), false, isAHit(q, entry.getValue(), a));
+ }
+
+ }
+ public void testWildCardQueryNoLeadingAllowed() {
+ boolean ex = false;
+ try{
+ String q = parseWithAnalyzingQueryParser(wildcardInput[0], a, false);
+
+ } catch (ParseException e){
+ ex = true;
+ }
+ assertEquals("Testing initial wildcard not allowed",
+ true, ex);
+ }
+
public void testWildCardQuery() throws ParseException {
for (int i = 0; i < wildcardInput.length; i++) {
assertEquals("Testing wildcards with analyzer " + a.getClass() + ", input string: "
- + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a));
+ + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a, true));
}
}
+
public void testPrefixQuery() throws ParseException {
for (int i = 0; i < prefixInput.length; i++) {
assertEquals("Testing prefixes with analyzer " + a.getClass() + ", input string: "
- + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a));
+ + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a, false));
}
}
public void testRangeQuery() throws ParseException {
for (int i = 0; i < rangeInput.length; i++) {
assertEquals("Testing ranges with analyzer " + a.getClass() + ", input string: "
- + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a));
+ + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a, false));
}
}
public void testFuzzyQuery() throws ParseException {
for (int i = 0; i < fuzzyInput.length; i++) {
assertEquals("Testing fuzzys with analyzer " + a.getClass() + ", input string: "
- + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a));
+ + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a, false));
}
}
- private String parseWithAnalyzingQueryParser(String s, Analyzer a) throws ParseException {
- AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "field", a);
+
+ private String parseWithAnalyzingQueryParser(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException {
+ Query q = getAnalyzedQuery(s, a, allowLeadingWildcard);
+ return q.toString(FIELD);
+ }
+
+ private Query getAnalyzedQuery(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException {
+ AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
+ qp.setAllowLeadingWildcard(allowLeadingWildcard);
org.apache.lucene.search.Query q = qp.parse(s);
- return q.toString("field");
+ return q;
}
-
+
final static class FoldingFilter extends TokenFilter {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
@@ -146,31 +254,45 @@ public class TestAnalyzingQueryParser ex
final static class ASCIIAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+ Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(result, new FoldingFilter(result));
}
}
-
+
+
// LUCENE-4176
public void testByteTerms() throws Exception {
- Directory ramDir = newDirectory();
+ String s = "à¹à¸";
Analyzer analyzer = new MockBytesAnalyzer();
+ QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, analyzer);
+ Query q = qp.parse("[à¹à¸ TO à¹à¸]");
+ assertEquals(true, isAHit(q, s, analyzer));
+ }
+
+
+ private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
+ Directory ramDir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer);
Document doc = new Document();
FieldType fieldType = new FieldType();
fieldType.setIndexed(true);
fieldType.setTokenized(true);
fieldType.setStored(true);
- Field field = new Field("content","à¹à¸", fieldType);
+ Field field = new Field(FIELD, content, fieldType);
doc.add(field);
writer.addDocument(doc);
writer.close();
DirectoryReader ir = DirectoryReader.open(ramDir);
- IndexSearcher is = newSearcher(ir);
- QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "content", analyzer);
- Query q = qp.parse("[à¹à¸ TO à¹à¸]");
- assertEquals(1, is.search(q, 10).totalHits);
+ IndexSearcher is = new IndexSearcher(ir);
+
+ int hits = is.search(q, 10).totalHits;
ir.close();
ramDir.close();
+ if (hits == 1){
+ return true;
+ } else {
+ return false;
+ }
+
}
}
\ No newline at end of file