You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2010/11/05 21:07:20 UTC

svn commit: r1031765 - in /lucene/dev/trunk: lucene/ lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/ lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/ lucene/src/java/org/apache/lucene/index/c...

Author: rmuir
Date: Fri Nov  5 20:07:20 2010
New Revision: 1031765

URL: http://svn.apache.org/viewvc?rev=1031765&view=rev
Log:
LUCENE-588: Escaped wildcard character in wildcard term not handled correctly

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java
    lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java
    lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Nov  5 20:07:20 2010
@@ -113,6 +113,12 @@ Changes in backwards compatibility polic
 * LUCENE-2674: MultiTermQuery.TermCollector.collect now accepts the
   TermsEnum as well.  (Robert Muir, Mike McCandless)
 
+* LUCENE-588: WildcardQuery and QueryParser now allows escaping with 
+  the '\' character. Previously this was impossible (you could not escape */?,
+  for example).  If your code somehow depends on the old behavior, you will
+  need to change it (e.g. using "\\" to escape '\' itself).  
+  (Sunil Kamath, Terry Yang via Robert Muir)
+ 
 Changes in Runtime Behavior
 
 * LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/nodes/WildcardQueryNode.java Fri Nov  5 20:07:20 2010
@@ -49,9 +49,9 @@ public class WildcardQueryNode extends F
   @Override
   public CharSequence toQueryString(EscapeQuerySyntax escaper) {
     if (isDefaultField(this.field)) {
-      return getTermEscaped(escaper);
+      return this.text;
     } else {
-      return this.field + ":" + getTermEscaped(escaper);
+      return this.field + ":" + this.text;
     }
   }
 

Modified: lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Fri Nov  5 20:07:20 2010
@@ -76,6 +76,7 @@ import org.apache.lucene.util.LuceneTest
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Ignore;
 
 /**
  * This test case is a copy of the core Lucene query parser test, it was adapted
@@ -945,6 +946,15 @@ public class TestQPHelper extends Lucene
     assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&");
   }
 
+  @Ignore("contrib queryparser shouldn't escape wildcard terms")
+  public void testEscapedWildcard() throws Exception {
+    StandardQueryParser qp = new StandardQueryParser();
+    qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+
+    WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
+    assertEquals(q, qp.parse("foo\\?ba?r", "field"));
+  }
+
   public void testTabNewlineCarriageReturn() throws Exception {
     assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
 

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java Fri Nov  5 20:07:20 2010
@@ -33,7 +33,11 @@ import org.apache.lucene.util.StringHelp
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.Map;
+import java.util.Set;
 import java.util.HashMap;
+import java.util.TreeMap;
+import java.util.SortedMap;
+import java.util.Iterator;
 
 class SimpleTextFieldsReader extends FieldsProducer {
 
@@ -78,7 +82,7 @@ class SimpleTextFieldsReader extends Fie
   private class SimpleTextFieldsEnum extends FieldsEnum {
     private final IndexInput in;
     private final BytesRef scratch = new BytesRef(10);
-    private boolean omitTF;
+    private String current;
 
     public SimpleTextFieldsEnum() {
       this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
@@ -89,11 +93,12 @@ class SimpleTextFieldsReader extends Fie
       while(true) {
         readLine(in, scratch);
         if (scratch.equals(END)) {
+          current = null;
           return null;
         }
         if (scratch.startsWith(FIELD)) {
           String field = StringHelper.intern(new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"));
-          omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
+          current = field;
           return field;
         }
       }
@@ -101,7 +106,7 @@ class SimpleTextFieldsReader extends Fie
 
     @Override
     public TermsEnum terms() throws IOException {
-      return new SimpleTextTermsEnum(in.getFilePointer(), omitTF);
+      return SimpleTextFieldsReader.this.terms(current).iterator();
     }
   }
 
@@ -109,21 +114,42 @@ class SimpleTextFieldsReader extends Fie
     private final IndexInput in;
     private final boolean omitTF;
     private BytesRef current;
-    private final long fieldStart;
-    private final BytesRef scratch = new BytesRef(10);
-    private final BytesRef scratch2 = new BytesRef(10);
     private int docFreq;
     private long docsStart;
     private boolean ended;
+    private final TreeMap<BytesRef,TermData> allTerms;
+    private Iterator<Map.Entry<BytesRef,TermData>> iter;
 
-    public SimpleTextTermsEnum(long offset, boolean omitTF) throws IOException {
+    public SimpleTextTermsEnum(TreeMap<BytesRef,TermData> allTerms, boolean omitTF) throws IOException {
       this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
-      this.in.seek(offset);
+      this.allTerms = allTerms;
       this.omitTF = omitTF;
-      fieldStart = offset;
+      iter = allTerms.entrySet().iterator();
     }
 
     public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
+      
+      final SortedMap<BytesRef,TermData> tailMap = allTerms.tailMap(text);
+
+      if (tailMap.isEmpty()) {
+        current = null;
+        return SeekStatus.END;
+      } else {
+        current = tailMap.firstKey();
+        final TermData td = tailMap.get(current);
+        docsStart = td.docsStart;
+        docFreq = td.docFreq;
+        iter = tailMap.entrySet().iterator();
+        assert iter.hasNext();
+        iter.next();
+        if (current.equals(text)) {
+          return SeekStatus.FOUND;
+        } else {
+          return SeekStatus.NOT_FOUND;
+        }
+      }
+
+      /*
       if (current != null) {
         final int cmp = current.compareTo(text);
         if (cmp == 0) {
@@ -153,6 +179,7 @@ class SimpleTextFieldsReader extends Fie
       current = null;
       ended = true;
       return SeekStatus.END;
+      */
     }
 
     @Override
@@ -162,6 +189,20 @@ class SimpleTextFieldsReader extends Fie
     @Override
     public BytesRef next() throws IOException {
       assert !ended;
+
+      if (iter.hasNext()) {
+        Map.Entry<BytesRef,TermData> ent = iter.next();
+        current = ent.getKey();
+        TermData td = ent.getValue();
+        docFreq = td.docFreq;
+        docsStart = td.docsStart;
+        return current;
+      } else {
+        current = null;
+        return null;
+      }
+
+      /*
       readLine(in, scratch);
       if (scratch.equals(END) || scratch.startsWith(FIELD)) {
         ended = true;
@@ -192,6 +233,7 @@ class SimpleTextFieldsReader extends Fie
         in.seek(lineStart);
         return current;
       }
+      */
     }
 
     @Override
@@ -447,20 +489,70 @@ class SimpleTextFieldsReader extends Fie
     }
   }
 
+  static class TermData {
+    public long docsStart;
+    public int docFreq;
+
+    public TermData(long docsStart, int docFreq) {
+      this.docsStart = docsStart;
+      this.docFreq = docFreq;
+    }
+  }
+
   private class SimpleTextTerms extends Terms {
     private final String field;
     private final long termsStart;
     private final boolean omitTF;
 
-    public SimpleTextTerms(String field, long termsStart) {
+    // NOTE: horribly, horribly RAM consuming, but then
+    // SimpleText should never be used in production
+    private final TreeMap<BytesRef,TermData> allTerms = new TreeMap<BytesRef,TermData>();
+
+    private final BytesRef scratch = new BytesRef(10);
+
+    public SimpleTextTerms(String field, long termsStart) throws IOException {
       this.field = StringHelper.intern(field);
       this.termsStart = termsStart;
       omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
+      loadTerms();
+    }
+
+    private void loadTerms() throws IOException {
+      IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
+      in.seek(termsStart);
+      final BytesRef lastTerm = new BytesRef(10);
+      long lastDocsStart = -1;
+      int docFreq = 0;
+      while(true) {
+        readLine(in, scratch);
+        if (scratch.equals(END) || scratch.startsWith(FIELD)) {
+          if (lastDocsStart != -1) {
+            allTerms.put(new BytesRef(lastTerm),
+                         new TermData(lastDocsStart, docFreq));
+          }
+          break;
+        } else if (scratch.startsWith(DOC)) {
+          docFreq++;
+        } else if (scratch.startsWith(TERM)) {
+          if (lastDocsStart != -1) {
+            allTerms.put(new BytesRef(lastTerm),
+                         new TermData(lastDocsStart, docFreq));
+          }
+          lastDocsStart = in.getFilePointer();
+          final int len = scratch.length - TERM.length;
+          if (len > lastTerm.length) {
+            lastTerm.grow(len);
+          }
+          System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
+          lastTerm.length = len;
+          docFreq = 0;
+        }
+      }
     }
 
     @Override
     public TermsEnum iterator() throws IOException {
-      return new SimpleTextTermsEnum(termsStart, omitTF);
+      return new SimpleTextTermsEnum(allTerms, omitTF);
     }
 
     @Override

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java Fri Nov  5 20:07:20 2010
@@ -1045,7 +1045,7 @@ public abstract class QueryParserBase {
 
     String termImage=discardEscapeChar(term.image);
     if (wildcard) {
-      q = getWildcardQuery(qfield, termImage);
+      q = getWildcardQuery(qfield, term.image);
     } else if (prefix) {
       q = getPrefixQuery(qfield,
           discardEscapeChar(term.image.substring

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/search/WildcardQuery.java Fri Nov  5 20:07:20 2010
@@ -45,6 +45,9 @@ public class WildcardQuery extends Autom
   /** Char equality with support for wildcards */
   public static final char WILDCARD_CHAR = '?';
 
+  /** Escape character */
+  public static final char WILDCARD_ESCAPE = '\\';
+  
   /**
    * Constructs a query for terms matching <code>term</code>. 
    */
@@ -56,6 +59,7 @@ public class WildcardQuery extends Autom
    * Convert Lucene wildcard syntax into an automaton.
    * @lucene.internal
    */
+  @SuppressWarnings("fallthrough")
   public static Automaton toAutomaton(Term wildcardquery) {
     List<Automaton> automata = new ArrayList<Automaton>();
     
@@ -63,6 +67,7 @@ public class WildcardQuery extends Autom
     
     for (int i = 0; i < wildcardText.length();) {
       final int c = wildcardText.codePointAt(i);
+      int length = Character.charCount(c);
       switch(c) {
         case WILDCARD_STRING: 
           automata.add(BasicAutomata.makeAnyString());
@@ -70,10 +75,18 @@ public class WildcardQuery extends Autom
         case WILDCARD_CHAR:
           automata.add(BasicAutomata.makeAnyChar());
           break;
+        case WILDCARD_ESCAPE:
+          // add the next codepoint instead, if it exists
+          if (i + length < wildcardText.length()) {
+            final int nextChar = wildcardText.codePointAt(i + length);
+            length += Character.charCount(nextChar);
+            automata.add(BasicAutomata.makeChar(nextChar));
+            break;
+          } // else fallthru, lenient parsing with a trailing \
         default:
           automata.add(BasicAutomata.makeChar(c));
       }
-      i += Character.charCount(c);
+      i += length;
     }
     
     return BasicOperations.concatenate(automata);

Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/automaton/SpecialOperations.java Fri Nov  5 20:07:20 2010
@@ -178,7 +178,7 @@ final public class SpecialOperations {
    * Reverses the language of the given (non-singleton) automaton while returning
    * the set of new initial states.
    */
-  static Set<State> reverse(Automaton a) {
+  public static Set<State> reverse(Automaton a) {
     a.expandSingleton();
     // reverse all edges
     HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java Fri Nov  5 20:07:20 2010
@@ -770,11 +770,11 @@ public class TestQueryParser extends Luc
 
     assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
 
-    assertQueryEquals("a:b\\-?c", a, "a:b-?c");
-    assertQueryEquals("a:b\\+?c", a, "a:b+?c");
-    assertQueryEquals("a:b\\:?c", a, "a:b:?c");
+    assertQueryEquals("a:b\\-?c", a, "a:b\\-?c");
+    assertQueryEquals("a:b\\+?c", a, "a:b\\+?c");
+    assertQueryEquals("a:b\\:?c", a, "a:b\\:?c");
 
-    assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
+    assertQueryEquals("a:b\\\\?c", a, "a:b\\\\?c");
 
     assertQueryEquals("a:b\\-c~", a, "a:b-c~2.0");
     assertQueryEquals("a:b\\+c~", a, "a:b+c~2.0");
@@ -1062,6 +1062,12 @@ public class TestQueryParser extends Luc
 
   }
 
+  public void testEscapedWildcard() throws Exception {
+    QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
+    WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
+    assertEquals(q, qp.parse("foo\\?ba?r"));
+  }
+  
   public void testRegexps() throws Exception {
     QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
     RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));

Modified: lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java (original)
+++ lucene/dev/trunk/lucene/src/test/org/apache/lucene/search/TestWildcard.java Fri Nov  5 20:07:20 2010
@@ -205,6 +205,38 @@ public class TestWildcard
     indexStore.close();
   }
 
+  /**
+   * Tests if wildcard escaping works
+   */
+  public void testEscapes() throws Exception {
+    Directory indexStore = getIndexStore("field", 
+        new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"});
+    IndexSearcher searcher = new IndexSearcher(indexStore, true);
+
+    // without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar
+    WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar"));
+    assertMatches(searcher, unescaped, 4);
+    
+    // with escape: only matches foo*bar
+    WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar"));
+    assertMatches(searcher, escaped, 1);
+    
+    // without escape: matches foo??bar and fooCDbar
+    unescaped = new WildcardQuery(new Term("field", "foo??bar"));
+    assertMatches(searcher, unescaped, 2);
+    
+    // with escape: matches foo??bar only
+    escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar"));
+    assertMatches(searcher, escaped, 1);
+    
+    // check escaping at end: lenient parse yields "foo\"
+    WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\"));
+    assertMatches(searcher, atEnd, 1);
+    
+    searcher.close();
+    indexStore.close();
+  }
+  
   private Directory getIndexStore(String field, String[] contents)
       throws IOException {
     Directory indexStore = newDirectory();

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java Fri Nov  5 20:07:20 2010
@@ -30,6 +30,7 @@ import org.apache.lucene.util.Version;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.BasicAutomata;
 import org.apache.lucene.util.automaton.BasicOperations;
+import org.apache.lucene.util.automaton.SpecialOperations;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.solr.analysis.*;
 import org.apache.solr.common.SolrException;
@@ -202,37 +203,36 @@ public class SolrQueryParser extends Que
     String type = schema.getFieldType(field).getTypeName();
     ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
     if (factory != null) {
+      Term term = new Term(field, termStr);
+      // fsa representing the query
+      Automaton automaton = WildcardQuery.toAutomaton(term);
+      // TODO: we should likely use the automaton to calculate shouldReverse, too.
       if (factory.shouldReverse(termStr)) {
-        int len = termStr.length();
-        char[] chars = new char[len+1];
-        chars[0] = factory.getMarkerChar();      
-        termStr.getChars(0, len, chars, 1);
-        ReversedWildcardFilter.reverse(chars, 1, len);
-        termStr = new String(chars);
+        automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
+        SpecialOperations.reverse(automaton);
       } else { 
         // reverse wildcardfilter is active: remove false positives
-        Term term = new Term(field, termStr);
-        // fsa representing the query
-        Automaton a = WildcardQuery.toAutomaton(term);
         // fsa representing false positives (markerChar*)
         Automaton falsePositives = BasicOperations.concatenate(
             BasicAutomata.makeChar(factory.getMarkerChar()), 
             BasicAutomata.makeAnyString());
-        return new AutomatonQuery(term, BasicOperations.minus(a, falsePositives)) {
-          // override toString so its completely transparent
-          @Override
-          public String toString(String field) {
-            StringBuilder buffer = new StringBuilder();
-            if (!getField().equals(field)) {
-              buffer.append(getField());
-              buffer.append(":");
-            }
-            buffer.append(term.text());
-            buffer.append(ToStringUtils.boost(getBoost()));
-            return buffer.toString();
-          }
-        };
+        // subtract these away
+        automaton = BasicOperations.minus(automaton, falsePositives);
       }
+      return new AutomatonQuery(term, automaton) {
+        // override toString so its completely transparent
+        @Override
+        public String toString(String field) {
+          StringBuilder buffer = new StringBuilder();
+          if (!getField().equals(field)) {
+            buffer.append(getField());
+            buffer.append(":");
+          }
+          buffer.append(term.text());
+          buffer.append(ToStringUtils.boost(getBoost()));
+          return buffer.toString();
+        }
+      };
     }
     Query q = super.getWildcardQuery(field, termStr);
     if (q instanceof WildcardQuery) {

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java?rev=1031765&r1=1031764&r2=1031765&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java Fri Nov  5 20:07:20 2010
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.lang.reflect.Field;
 
 import java.util.HashMap;
 import java.util.Map;
@@ -26,8 +27,10 @@ import java.util.Map;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.AutomatonQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.SpecialOperations;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.search.SolrQueryParser;
@@ -51,6 +54,8 @@ public class TestReversedWildcardFilterF
   public void setUp() throws Exception {
     super.setUp();
     schema = new IndexSchema(solrConfig, getSchemaFile(), null);
+    clearIndex();
+    assertU(commit());
   }
 
   @Test
@@ -105,7 +110,7 @@ public class TestReversedWildcardFilterF
   }
   
   @Test
-  public void testQueryParsing() throws IOException, ParseException {
+  public void testQueryParsing() throws Exception {
 
     SolrQueryParser parserOne = new SolrQueryParser(schema, "one");
     assertTrue(parserOne.getAllowLeadingWildcard());
@@ -115,28 +120,53 @@ public class TestReversedWildcardFilterF
     // XXX note: this should be false, but for now we return true for any field,
     // XXX if at least one field uses the reversing
     assertTrue(parserThree.getAllowLeadingWildcard());
-    String text = "one +two *hree f*ur fiv* *si\uD834\uDD1Ex";
-    String expectedOne = "one:one +one:two one:\u0001eerh* one:\u0001ru*f one:fiv* one:\u0001x\uD834\uDD1Eis*";
-    String expectedTwo = "two:one +two:two two:\u0001eerh* two:\u0001ru*f two:fiv* two:\u0001x\uD834\uDD1Eis*";
-    String expectedThree = "three:one +three:two three:*hree three:f*ur three:fiv* three:*si\uD834\uDD1Ex";
-    Query q = parserOne.parse(text);
-    assertEquals(expectedOne, q.toString());
-    q = parserTwo.parse(text);
-    assertEquals(expectedTwo, q.toString());
-    q = parserThree.parse(text);
-    assertEquals(expectedThree, q.toString());
+    
+    // add some docs
+    assertU(adoc("id", "1", "one", "one"));
+    assertU(adoc("id", "2", "two", "two"));
+    assertU(adoc("id", "3", "three", "three"));
+    assertU(adoc("id", "4", "one", "four"));
+    assertU(adoc("id", "5", "two", "five"));
+    assertU(adoc("id", "6", "three", "si\uD834\uDD1Ex"));
+    assertU(commit());
+    
+    assertQ("should have matched",
+        req("+id:1 +one:one"),
+        "//result[@numFound=1]");
+    
+    assertQ("should have matched",
+        req("+id:4 +one:f*ur"),
+        "//result[@numFound=1]");
+        
+    assertQ("should have matched",
+        req("+id:6 +three:*si\uD834\uDD1Ex"),
+        "//result[@numFound=1]");
+    
     // test conditional reversal
-    String condText = "*hree t*ree th*ee thr*e ?hree t?ree th?ee th?*ee " + 
-        "short*token ver*longtoken";
-    String expected = "two:\u0001eerh* two:\u0001eer*t two:\u0001ee*ht " +
-        "two:thr*e " +
-        "two:\u0001eerh? two:\u0001eer?t " +
-        "two:th?ee " +
-        "two:th?*ee " +
-        "two:short*token " +
-        "two:\u0001nekotgnol*rev";
-    q = parserTwo.parse(condText);
-    assertEquals(expected, q.toString());
+    assertTrue(wasReversed(parserTwo, "*hree"));
+    assertTrue(wasReversed(parserTwo, "t*ree"));
+    assertTrue(wasReversed(parserTwo, "th*ee"));
+    assertFalse(wasReversed(parserTwo, "thr*e"));
+    assertTrue(wasReversed(parserTwo, "?hree"));
+    assertTrue(wasReversed(parserTwo, "t?ree"));
+    assertFalse(wasReversed(parserTwo, "th?ee"));
+    assertFalse(wasReversed(parserTwo, "th?*ee"));
+    assertFalse(wasReversed(parserTwo, "short*token"));
+    assertTrue(wasReversed(parserTwo, "ver*longtoken"));
+  }
+  
+  /** fragile assert: depends on our implementation, but cleanest way to check for now */ 
+  private boolean wasReversed(SolrQueryParser qp, String query) throws Exception {
+    Query q = qp.parse(query);
+    if (!(q instanceof AutomatonQuery))
+      return false;
+    // this is a hack to get the protected Automaton field in AutomatonQuery, 
+    // may break in later lucene versions - we have no getter... for good reasons.
+    final Field automatonField = AutomatonQuery.class.getDeclaredField("automaton");
+    automatonField.setAccessible(true);
+    Automaton automaton = (Automaton) automatonField.get(q);
+    String prefix = SpecialOperations.getCommonPrefix(automaton);
+    return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
   }
 
   @Test