You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2013/05/07 00:52:01 UTC
svn commit: r1479710 - in /lucene/dev/trunk/lucene: CHANGES.txt queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java

Author: sarowe
Date: Mon May  6 22:52:00 2013
New Revision: 1479710

URL: http://svn.apache.org/r1479710
Log:
LUCENE-949: AnalyzingQueryParser can't work with leading wildcards.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
    lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1479710&r1=1479709&r2=1479710&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon May  6 22:52:00 2013
@@ -103,6 +103,9 @@ Bug Fixes
 * LUCENE-4972: DirectoryTaxonomyWriter created empty commits even if no changes 
   were made. (Shai Erera, Michael McCandless)
   
+* LUCENE-949: AnalyzingQueryParser can't work with leading wildcards.
+  (Tim Allison, Robert Muir, Steve Rowe)
+  
 Optimizations
 
 * LUCENE-4938: Don't use an unnecessarily large priority queue in IndexSearcher

Modified: lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java?rev=1479710&r1=1479709&r2=1479710&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java Mon May  6 22:52:00 2013
@@ -19,8 +19,8 @@ package org.apache.lucene.queryparser.an
 
 import java.io.IOException;
 import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
@@ -31,36 +31,29 @@ import org.apache.lucene.util.Version;
 
 /**
  * Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
- * are also passed through the given analyzer, but wild card characters (like <code>*</code>) 
- * don't get removed from the search terms.
+ * are also passed through the given analyzer, but wildcard characters <code>*</code> and
+ * <code>?</code> don't get removed from the search terms.
  * 
  * <p><b>Warning:</b> This class should only be used with analyzers that do not use stopwords
  * or that add tokens. Also, several stemming analyzers are inappropriate: for example, GermanAnalyzer 
  * will turn <code>H&auml;user</code> into <code>hau</code>, but <code>H?user</code> will 
  * become <code>h?user</code> when using this parser and thus no match would be found (i.e.
  * using this parser will be no improvement over QueryParser in such cases). 
- *
  */
 public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.QueryParser {
-
-  /**
-   * Constructs a query parser.
-   * @param field    the default field for query terms.
-   * @param analyzer used to find terms in the query text.
-   */
+  // gobble escaped chars or find a wildcard character 
+  private final Pattern wildcardPattern = Pattern.compile("(\\.)|([?*]+)");
   public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
     super(matchVersion, field, analyzer);
     setAnalyzeRangeTerms(true);
   }
 
   /**
-   * Called when parser
-   * parses an input term token that contains one or more wildcard
-   * characters (like <code>*</code>), but is not a prefix term token (one
-   * that has just a single * character at the end).
+   * Called when parser parses an input term that contains one or more wildcard
+   * characters (like <code>*</code>), but is not a prefix term (one that has
+   * just a single <code>*</code> character at the end).
    * <p>
-   * Example: will be called for <code>H?user</code> or for <code>H*user</code> 
-   * but not for <code>*user</code>.
+   * Example: will be called for <code>H?user</code> or for <code>H*user</code>.
    * <p>
    * Depending on analyzer and settings, a wildcard term may (most probably will)
    * be lower-cased automatically. It <b>will</b> go through the default Analyzer.
@@ -68,113 +61,52 @@ public class AnalyzingQueryParser extend
    * Overrides super class, by passing terms through analyzer.
    *
    * @param  field   Name of the field query will use.
-   * @param  termStr Term token that contains one or more wild card
+   * @param  termStr Term that contains one or more wildcard
    *                 characters (? or *), but is not simple prefix term
    *
    * @return Resulting {@link Query} built for the term
    */
   @Override
   protected Query getWildcardQuery(String field, String termStr) throws ParseException {
-    List<String> tlist = new ArrayList<String>();
-    List<String> wlist = new ArrayList<String>();
-    /* somewhat a hack: find/store wildcard chars
-     * in order to put them back after analyzing */
-    boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
-    StringBuilder tmpBuffer = new StringBuilder();
-    char[] chars = termStr.toCharArray();
-    for (int i = 0; i < termStr.length(); i++) {
-      if (chars[i] == '?' || chars[i] == '*') {
-        if (isWithinToken) {
-          tlist.add(tmpBuffer.toString());
-          tmpBuffer.setLength(0);
-        }
-        isWithinToken = false;
-      } else {
-        if (!isWithinToken) {
-          wlist.add(tmpBuffer.toString());
-          tmpBuffer.setLength(0);
-        }
-        isWithinToken = true;
-      }
-      tmpBuffer.append(chars[i]);
-    }
-    if (isWithinToken) {
-      tlist.add(tmpBuffer.toString());
-    } else {
-      wlist.add(tmpBuffer.toString());
-    }
 
-    // get Analyzer from superclass and tokenize the term
-    TokenStream source;
-    
-    int countTokens = 0;
-    try {
-      source = getAnalyzer().tokenStream(field, new StringReader(termStr));
-      source.reset();
-    } catch (IOException e1) {
-      throw new RuntimeException(e1);
+    if (termStr == null){
+      //can't imagine this would ever happen
+      throw new ParseException("Passed null value as term to getWildcardQuery");
+    }
+    if ( ! getAllowLeadingWildcard() && (termStr.startsWith("*") || termStr.startsWith("?"))) {
+      throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"
+                              + " unless getAllowLeadingWildcard() returns true");
     }
-    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
-    while (true) {
-      try {
-        if (!source.incrementToken()) break;
-      } catch (IOException e) {
-        break;
-      }
-      String term = termAtt.toString();
-      if (!"".equals(term)) {
-        try {
-          tlist.set(countTokens++, term);
-        } catch (IndexOutOfBoundsException ioobe) {
-          countTokens = -1;
-        }
-      }
-    }
-    try {
-      source.end();
-      source.close();
-    } catch (IOException e) {
-      // ignore
-    }
-
-    if (countTokens != tlist.size()) {
-      /* this means that the analyzer used either added or consumed 
-       * (common for a stemmer) tokens, and we can't build a WildcardQuery */
-      throw new ParseException("Cannot build WildcardQuery with analyzer "
-          + getAnalyzer().getClass() + " - tokens added or lost");
+    
+    Matcher wildcardMatcher = wildcardPattern.matcher(termStr);
+    StringBuilder sb = new StringBuilder();
+    int last = 0;
+  
+    while (wildcardMatcher.find()){
+      // continue if escaped char
+      if (wildcardMatcher.group(1) != null){
+        continue;
+      }
+     
+      if (wildcardMatcher.start() > 0){
+        String chunk = termStr.substring(last, wildcardMatcher.start());
+        String analyzed = analyzeSingleChunk(field, termStr, chunk);
+        sb.append(analyzed);
+      }
+      //append the wildcard character
+      sb.append(wildcardMatcher.group(2));
+     
+      last = wildcardMatcher.end();
     }
-
-    if (tlist.size() == 0) {
-      return null;
-    } else if (tlist.size() == 1) {
-      if (wlist != null && wlist.size() == 1) {
-        /* if wlist contains one wildcard, it must be at the end, because:
-         * 1) wildcards are not allowed in 1st position of a term by QueryParser
-         * 2) if wildcard was *not* in end, there would be *two* or more tokens */
-        return super.getWildcardQuery(field, tlist.get(0)
-            + wlist.get(0).toString());
-      } else {
-        /* we should never get here! if so, this method was called
-         * with a termStr containing no wildcard ... */
-        throw new IllegalArgumentException("getWildcardQuery called without wildcard");
-      }
-    } else {
-      /* the term was tokenized, let's rebuild to one token
-       * with wildcards put back in postion */
-      StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < tlist.size(); i++) {
-        sb.append( tlist.get(i));
-        if (wlist != null && wlist.size() > i) {
-          sb.append(wlist.get(i));
-        }
-      }
-      return super.getWildcardQuery(field, sb.toString());
+    if (last < termStr.length()){
+      sb.append(analyzeSingleChunk(field, termStr, termStr.substring(last)));
     }
+    return super.getWildcardQuery(field, sb.toString());
   }
-
+  
   /**
    * Called when parser parses an input term
-   * token that uses prefix notation; that is, contains a single '*' wildcard
+   * that uses prefix notation; that is, contains a single '*' wildcard
    * character as its last character. Since this is a special case
    * of generic wildcard term, and such a query can be optimized easily,
    * this usually results in a different query object.
@@ -185,52 +117,19 @@ public class AnalyzingQueryParser extend
    * Overrides super class, by passing terms through analyzer.
    *
    * @param  field   Name of the field query will use.
-   * @param  termStr Term token to use for building term for the query
+   * @param  termStr Term to use for building term for the query
    *                 (<b>without</b> trailing '*' character!)
    *
    * @return Resulting {@link Query} built for the term
    */
   @Override
   protected Query getPrefixQuery(String field, String termStr) throws ParseException {
-    // get Analyzer from superclass and tokenize the term
-    TokenStream source;
-    List<String> tlist = new ArrayList<String>();
-    try {
-      source = getAnalyzer().tokenStream(field, new StringReader(termStr));
-      source.reset();
-    } catch (IOException e1) {
-      throw new RuntimeException(e1);
-    }
-    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
-    while (true) {
-      try {
-        if (!source.incrementToken()) break;
-      } catch (IOException e) {
-        break;
-      }
-      tlist.add(termAtt.toString());
-    }
-
-    try {
-      source.end();
-      source.close();
-    } catch (IOException e) {
-      // ignore
-    }
-
-    if (tlist.size() == 1) {
-      return super.getPrefixQuery(field, tlist.get(0));
-    } else {
-      /* this means that the analyzer used either added or consumed
-       * (common for a stemmer) tokens, and we can't build a PrefixQuery */
-      throw new ParseException("Cannot build PrefixQuery with analyzer "
-          + getAnalyzer().getClass()
-          + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
-    }
+    String analyzed = analyzeSingleChunk(field, termStr, termStr);
+    return super.getPrefixQuery(field, analyzed);
   }
 
   /**
-   * Called when parser parses an input term token that has the fuzzy suffix (~) appended.
+   * Called when parser parses an input term that has the fuzzy suffix (~) appended.
    * <p>
    * Depending on analyzer and settings, a fuzzy term may (most probably will)
    * be lower-cased automatically. It <b>will</b> go through the default Analyzer.
@@ -238,42 +137,73 @@ public class AnalyzingQueryParser extend
    * Overrides super class, by passing terms through analyzer.
    *
    * @param field Name of the field query will use.
-   * @param termStr Term token to use for building term for the query
+   * @param termStr Term to use for building term for the query
    *
    * @return Resulting {@link Query} built for the term
    */
   @Override
   protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
       throws ParseException {
-    // get Analyzer from superclass and tokenize the term
-    TokenStream source = null;
-    String nextToken = null;
-    boolean multipleTokens = false;
-    
-    try {
-      source = getAnalyzer().tokenStream(field, new StringReader(termStr));
-      CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
-      source.reset();
-      if (source.incrementToken()) {
-        nextToken = termAtt.toString();
-      }
-      multipleTokens = source.incrementToken();
-    } catch (IOException e) {
-      nextToken = null;
-    }
-
-    try {
-      source.end();
-      source.close();
-    } catch (IOException e) {
-      // ignore
-    }
+   
+    String analyzed = analyzeSingleChunk(field, termStr, termStr);
+    return super.getFuzzyQuery(field, analyzed, minSimilarity);
+  }
 
-    if (multipleTokens) {
-      throw new ParseException("Cannot build FuzzyQuery with analyzer " + getAnalyzer().getClass()
-          + " - tokens were added");
+  /**
+   * Returns the analyzed form for the given chunk
+   * 
+   * If the analyzer produces more than one output token from the given chunk,
+   * a ParseException is thrown.
+   *
+   * @param field The target field
+   * @param termStr The full term from which the given chunk is excerpted
+   * @param chunk The portion of the given termStr to be analyzed
+   * @return The result of analyzing the given chunk
+   * @throws ParseException when analysis returns other than one output token
+   */
+  protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
+    String analyzed = null;
+    TokenStream stream = null;
+    try{
+      stream = getAnalyzer().tokenStream(field, new StringReader(chunk));
+      stream.reset();
+      CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
+      // get first and hopefully only output token
+      if (stream.incrementToken()) {
+        analyzed = termAtt.toString();
+        
+        // try to increment again, there should only be one output token
+        StringBuilder multipleOutputs = null;
+        while (stream.incrementToken()) {
+          if (null == multipleOutputs) {
+            multipleOutputs = new StringBuilder();
+            multipleOutputs.append('"');
+            multipleOutputs.append(analyzed);
+            multipleOutputs.append('"');
+          }
+          multipleOutputs.append(',');
+          multipleOutputs.append('"');
+          multipleOutputs.append(termAtt.toString());
+          multipleOutputs.append('"');
+        }
+        stream.end();
+        stream.close();
+        if (null != multipleOutputs) {
+          throw new ParseException(
+              String.format(getLocale(),
+                  "Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
+        }
+      } else {
+        // nothing returned by analyzer.  Was it a stop word and the user accidentally
+        // used an analyzer with stop words?
+        stream.end();
+        stream.close();
+        throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
+      }
+    } catch (IOException e){
+      throw new ParseException(
+          String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
     }
-
-    return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken, minSimilarity);
+    return analyzed;
   }
 }

Modified: lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java?rev=1479710&r1=1479709&r2=1479710&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java Mon May  6 22:52:00 2013
@@ -19,8 +19,17 @@ package org.apache.lucene.queryparser.an
 
 import java.io.IOException;
 import java.io.Reader;
+import java.util.Map;
+import java.util.TreeMap;
 
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockBytesAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -33,11 +42,14 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
 /**
  */
+@SuppressCodecs("Lucene3x") // binary terms
 public class TestAnalyzingQueryParser extends LuceneTestCase {
-
+  private final static String FIELD = "field";
+   
   private Analyzer a;
 
   private String[] wildcardInput;
@@ -49,12 +61,15 @@ public class TestAnalyzingQueryParser ex
   private String[] fuzzyInput;
   private String[] fuzzyExpected;
 
+  private Map<String, String> wildcardEscapeHits = new TreeMap<String, String>();
+  private Map<String, String> wildcardEscapeMisses = new TreeMap<String, String>();
+
   @Override
   public void setUp() throws Exception {
     super.setUp();
-    wildcardInput = new String[] { "Ã¼bersetzung Ã¼ber*ung",
+    wildcardInput = new String[] { "*bersetzung Ã¼ber*ung",
         "MÃ¶tley Cr\u00fce MÃ¶tl?* CrÃ¼?", "RenÃ©e Zellweger Ren?? Zellw?ger" };
-    wildcardExpected = new String[] { "ubersetzung uber*ung", "motley crue motl?* cru?",
+    wildcardExpected = new String[] { "*bersetzung uber*ung", "motley crue motl?* cru?",
         "renee zellweger ren?? zellw?ger" };
 
     prefixInput = new String[] { "Ã¼bersetzung Ã¼bersetz*",
@@ -71,43 +86,138 @@ public class TestAnalyzingQueryParser ex
     fuzzyExpected = new String[] { "ubersetzung ubersetzung~1",
         "motley crue motley~1 crue~2", "renee zellweger renee~0 zellweger~2" };
 
+    wildcardEscapeHits.put("mÃ¶*tley", "moatley");
+
+    // need to have at least one genuine wildcard to trigger the wildcard analysis
+    // hence the * before the y
+    wildcardEscapeHits.put("mÃ¶\\*tl*y", "mo*tley");
+
+    // escaped backslash then true wildcard
+    wildcardEscapeHits.put("mÃ¶\\\\*tley", "mo\\atley");
+    
+    // escaped wildcard then true wildcard
+    wildcardEscapeHits.put("mÃ¶\\??ley", "mo?tley");
+
+    // the first is an escaped * which should yield a miss
+    wildcardEscapeMisses.put("mÃ¶\\*tl*y", "moatley");
+      
     a = new ASCIIAnalyzer();
   }
 
+  public void testSingleChunkExceptions() {
+    boolean ex = false;
+    String termStr = "the*tre";
+      
+    Analyzer stopsAnalyzer = new MockAnalyzer
+        (random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
+    try {
+      String q = parseWithAnalyzingQueryParser(termStr, stopsAnalyzer, true);     
+    } catch (ParseException e){
+      if (e.getMessage().contains("returned nothing")){
+        ex = true;
+      }
+    }
+    assertEquals("Should have returned nothing", true, ex);
+    ex = false;
+     
+    AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
+    try{
+      qp.analyzeSingleChunk(FIELD, "", "not a single chunk");
+    } catch (ParseException e){
+      if (e.getMessage().contains("multiple terms")){
+        ex = true;
+      }
+    }
+    assertEquals("Should have produced multiple terms", true, ex);
+  }
+   
+  public void testWildcardAlone() throws ParseException {
+    //seems like crazy edge case, but can be useful in concordance 
+    boolean pex = false;
+    try{
+      Query q = getAnalyzedQuery("*", a, false);
+    } catch (ParseException e){
+      pex = true;
+    }
+    assertEquals("Wildcard alone with allowWildcard=false", true, pex);
+      
+    pex = false;
+    try {
+      String qString = parseWithAnalyzingQueryParser("*", a, true);
+      assertEquals("Every word", "*", qString);
+    } catch (ParseException e){
+      pex = true;
+    }
+      
+    assertEquals("Wildcard alone with allowWildcard=true", false, pex);
+
+  }
+  public void testWildCardEscapes() throws ParseException, IOException {
+
+    for (Map.Entry<String, String> entry : wildcardEscapeHits.entrySet()){
+      Query q = getAnalyzedQuery(entry.getKey(), a, false);
+      assertEquals("WildcardEscapeHits: " + entry.getKey(), true, isAHit(q, entry.getValue(), a));
+    }
+    for (Map.Entry<String, String> entry : wildcardEscapeMisses.entrySet()){
+      Query q = getAnalyzedQuery(entry.getKey(), a, false);
+      assertEquals("WildcardEscapeMisses: " + entry.getKey(), false, isAHit(q, entry.getValue(), a));
+    }
+
+  }
+  public void testWildCardQueryNoLeadingAllowed() {
+    boolean ex = false;
+    try{
+      String q = parseWithAnalyzingQueryParser(wildcardInput[0], a, false);
+
+    } catch (ParseException e){
+      ex = true;
+    }
+    assertEquals("Testing initial wildcard not allowed",
+        true, ex);
+  }
+
   public void testWildCardQuery() throws ParseException {
     for (int i = 0; i < wildcardInput.length; i++) {
       assertEquals("Testing wildcards with analyzer " + a.getClass() + ", input string: "
-          + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a));
+          + wildcardInput[i], wildcardExpected[i], parseWithAnalyzingQueryParser(wildcardInput[i], a, true));
     }
   }
 
+
   public void testPrefixQuery() throws ParseException {
     for (int i = 0; i < prefixInput.length; i++) {
       assertEquals("Testing prefixes with analyzer " + a.getClass() + ", input string: "
-          + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a));
+          + prefixInput[i], prefixExpected[i], parseWithAnalyzingQueryParser(prefixInput[i], a, false));
     }
   }
 
   public void testRangeQuery() throws ParseException {
     for (int i = 0; i < rangeInput.length; i++) {
       assertEquals("Testing ranges with analyzer " + a.getClass() + ", input string: "
-          + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a));
+          + rangeInput[i], rangeExpected[i], parseWithAnalyzingQueryParser(rangeInput[i], a, false));
     }
   }
 
   public void testFuzzyQuery() throws ParseException {
     for (int i = 0; i < fuzzyInput.length; i++) {
       assertEquals("Testing fuzzys with analyzer " + a.getClass() + ", input string: "
-          + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a));
+          + fuzzyInput[i], fuzzyExpected[i], parseWithAnalyzingQueryParser(fuzzyInput[i], a, false));
     }
   }
 
-  private String parseWithAnalyzingQueryParser(String s, Analyzer a) throws ParseException {
-    AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "field", a);
+
+  private String parseWithAnalyzingQueryParser(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException {
+    Query q = getAnalyzedQuery(s, a, allowLeadingWildcard);
+    return q.toString(FIELD);
+  }
+
+  private Query getAnalyzedQuery(String s, Analyzer a, boolean allowLeadingWildcard) throws ParseException {
+    AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
+    qp.setAllowLeadingWildcard(allowLeadingWildcard);
     org.apache.lucene.search.Query q = qp.parse(s);
-    return q.toString("field");
+    return q;
   }
-  
+
   final static class FoldingFilter extends TokenFilter {
     final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 
@@ -144,31 +254,45 @@ public class TestAnalyzingQueryParser ex
   final static class ASCIIAnalyzer extends Analyzer {
     @Override
     public TokenStreamComponents createComponents(String fieldName, Reader reader) {
-      Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
+      Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
       return new TokenStreamComponents(result, new FoldingFilter(result));
     }
   }
-  
+   
+
   // LUCENE-4176
   public void testByteTerms() throws Exception {
-    Directory ramDir = newDirectory();
+    String s = "à¹à¸";
     Analyzer analyzer = new MockBytesAnalyzer();
+    QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, analyzer);
+    Query q = qp.parse("[à¹à¸ TO à¹à¸]");
+    assertEquals(true, isAHit(q, s, analyzer));
+  }
+   
+  
+  private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
+    Directory ramDir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer);
     Document doc = new Document();
     FieldType fieldType = new FieldType();
     fieldType.setIndexed(true);
     fieldType.setTokenized(true);
     fieldType.setStored(true);
-    Field field = new Field("content","à¹à¸", fieldType);
+    Field field = new Field(FIELD, content, fieldType);
     doc.add(field);
     writer.addDocument(doc);
     writer.close();
     DirectoryReader ir = DirectoryReader.open(ramDir);
-    IndexSearcher is = newSearcher(ir);
-    QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "content", analyzer);
-    Query q = qp.parse("[à¹à¸ TO à¹à¸]");
-    assertEquals(1, is.search(q, 10).totalHits);
+    IndexSearcher is = new IndexSearcher(ir);
+      
+    int hits = is.search(q, 10).totalHits;
     ir.close();
     ramDir.close();
+    if (hits == 1){
+      return true;
+    } else {
+      return false;
+    }
+
   }
 }
\ No newline at end of file