You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/06/28 15:29:14 UTC

svn commit: r1355008 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/queryparser/ lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/ lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/ lucene/test-framework/ lu...

Author: rmuir
Date: Thu Jun 28 13:29:13 2012
New Revision: 1355008

URL: http://svn.apache.org/viewvc?rev=1355008&view=rev
Log:
LUCENE-4176: fix AnalyzingQueryParser to analyze range endpoints as bytes

Added:
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
      - copied unchanged from r1355001, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAnalyzer.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAttributeFactory.java
      - copied unchanged from r1355001, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockBytesAttributeFactory.java
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/MockUTF16TermAttributeImpl.java
      - copied unchanged from r1355001, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/analysis/MockUTF16TermAttributeImpl.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
    lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1355008&r1=1355007&r2=1355008&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Jun 28 13:29:13 2012
@@ -15,6 +15,11 @@ API Changes
   has a different API (carries a list of tags instead of a compound tag). Upgrade
   of embedded morfologik dictionaries to version 1.9. (Dawid Weiss)
 
+Bug Fixes
+
+* LUCENE-4176: Fix AnalyzingQueryParser to analyze range endpoints as bytes,
+  so that it works correctly with Analyzers that produce binary non-UTF-8 terms
+  such as CollationAnalyzer. (Nattapong Sirilappanich via Robert Muir) 
 
 ======================= Lucene 4.0.0-ALPHA =======================
 More information about this release, including any errata related to the 

Modified: lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java?rev=1355008&r1=1355007&r2=1355008&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java Thu Jun 28 13:29:13 2012
@@ -50,6 +50,7 @@ public class AnalyzingQueryParser extend
    */
   public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
     super(matchVersion, field, analyzer);
+    setAnalyzeRangeTerms(true);
   }
 
   /**
@@ -278,72 +279,4 @@ public class AnalyzingQueryParser extend
 
     return (nextToken == null) ? null : super.getFuzzyQuery(field, nextToken, minSimilarity);
   }
-
-  /**
-   * Overrides super class, by passing terms through analyzer.
-   * @exception ParseException
-   */
-  @Override
-  protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive)
-      throws ParseException {
-    // get Analyzer from superclass and tokenize the terms
-    TokenStream source = null;
-    CharTermAttribute termAtt = null;
-    boolean multipleTokens = false;
-
-    if (part1 != null) {
-      // part1
-      try {
-        source = getAnalyzer().tokenStream(field, new StringReader(part1));
-        termAtt = source.addAttribute(CharTermAttribute.class);
-        source.reset();
-        multipleTokens = false;
-
-
-        if (source.incrementToken()) {
-          part1 = termAtt.toString();
-        }
-        multipleTokens = source.incrementToken();
-      } catch (IOException e) {
-        // ignore
-      }
-      try {
-        source.end();
-        source.close();
-      } catch (IOException e) {
-        // ignore
-      }
-      if (multipleTokens) {
-        throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
-            + " - tokens were added to part1");
-      }
-    }
-
-    if (part2 != null) {
-      try {
-        // part2
-        source = getAnalyzer().tokenStream(field, new StringReader(part2));
-        termAtt = source.addAttribute(CharTermAttribute.class);
-        source.reset();
-        if (source.incrementToken()) {
-          part2 = termAtt.toString();
-        }
-        multipleTokens = source.incrementToken();
-      } catch (IOException e) {
-        // ignore
-      }
-      try {
-        source.end();
-        source.close();
-      } catch (IOException e) {
-        // ignore
-      }
-      if (multipleTokens) {
-        throw new ParseException("Cannot build RangeQuery with analyzer " + getAnalyzer().getClass()
-            + " - tokens were added to part2");
-      }
-    }
-    return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
-  }
-
 }

Modified: lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java?rev=1355008&r1=1355007&r2=1355008&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queryparser/src/test/org/apache/lucene/queryparser/analyzing/TestAnalyzingQueryParser.java Thu Jun 28 13:29:13 2012
@@ -22,11 +22,22 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
 /**
  */
+@SuppressCodecs("Lucene3x") // binary terms
 public class TestAnalyzingQueryParser extends LuceneTestCase {
 
   private Analyzer a;
@@ -138,5 +149,28 @@ public class TestAnalyzingQueryParser ex
       Tokenizer result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
       return new TokenStreamComponents(result, new FoldingFilter(result));
     }
-  }  
+  }
+  
+  // LUCENE-4176
+  public void testByteTerms() throws Exception {
+    Directory ramDir = newDirectory();
+    Analyzer analyzer = new MockBytesAnalyzer();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer);
+    Document doc = new Document();
+    FieldType fieldType = new FieldType();
+    fieldType.setIndexed(true);
+    fieldType.setTokenized(true);
+    fieldType.setStored(true);
+    Field field = new Field("content","เข", fieldType);
+    doc.add(field);
+    writer.addDocument(doc);
+    writer.close();
+    DirectoryReader ir = DirectoryReader.open(ramDir);
+    IndexSearcher is = new IndexSearcher(ir);
+    QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, "content", analyzer);
+    Query q = qp.parse("[เข TO เข]");
+    assertEquals(1, is.search(q, 10).totalHits);
+    ir.close();
+    ramDir.close();
+  }
 }
\ No newline at end of file