You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/02/14 22:39:11 UTC

svn commit: r910082 - in /lucene/java/branches/lucene_2_9: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/ contrib/highlighter/src/test/ contrib/instant...

Author: uschindler
Date: Sun Feb 14 21:39:10 2010
New Revision: 910082

URL: http://svn.apache.org/viewvc?rev=910082&view=rev
Log:
LUCENE-2266: Fixed offset calculations in NGramTokenFilter and EdgeNGramTokenFilter

Modified:
    lucene/java/branches/lucene_2_9/   (props changed)
    lucene/java/branches/lucene_2_9/CHANGES.txt   (props changed)
    lucene/java/branches/lucene_2_9/build.xml   (props changed)
    lucene/java/branches/lucene_2_9/contrib/   (props changed)
    lucene/java/branches/lucene_2_9/contrib/CHANGES.txt   (contents, props changed)
    lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
    lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
    lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
    lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
    lucene/java/branches/lucene_2_9/contrib/highlighter/src/test/   (props changed)
    lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java   (props changed)
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java   (props changed)
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java   (props changed)
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/util/AttributeSource.java   (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java   (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java   (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java   (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java   (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java   (props changed)

Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4:748824
 /lucene/java/branches/lucene_3_0:886275,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/CHANGES.txt:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/CHANGES.txt:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/CHANGES.txt:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/build.xml
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/build.xml:889688,891211,900212,908479,909401
-/lucene/java/trunk/build.xml:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/build.xml:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/contrib:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/contrib:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Sun Feb 14 21:39:10 2010
@@ -32,6 +32,9 @@
    CJKTokenizer, ChineseTokenizer, SmartChinese SentenceTokenizer,
    and WikipediaTokenizer.  (Koji Sekiguchi, Robert Muir)
 
+ * LUCENE-2266: Fixed offset calculations in NGramTokenFilter and 
+   EdgeNGramTokenFilter.  (Joe Calderon, Robert Muir via Uwe Schindler)
+   
 API Changes
 
  * LUCENE-2108: Add SpellChecker.close, to close the underlying

Propchange: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/contrib/CHANGES.txt:889688,891211,900212,908479,909401
-/lucene/java/trunk/contrib/CHANGES.txt:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib/CHANGES.txt:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034,910078

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Sun Feb 14 21:39:10 2010
@@ -70,6 +70,7 @@
   private char[] curTermBuffer;
   private int curTermLength;
   private int curGramSize;
+  private int tokStart;
   
   private TermAttribute termAtt;
   private OffsetAttribute offsetAtt;
@@ -132,6 +133,7 @@
           curTermBuffer = (char[]) termAtt.termBuffer().clone();
           curTermLength = termAtt.termLength();
           curGramSize = minGram;
+          tokStart = offsetAtt.startOffset();
         }
       }
       if (curGramSize <= maxGram) {
@@ -141,7 +143,7 @@
           int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
           int end = start + curGramSize;
           clearAttributes();
-          offsetAtt.setOffset(start, end);
+          offsetAtt.setOffset(tokStart + start, tokStart + end);
           termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
           curGramSize++;
           return true;

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Sun Feb 14 21:39:10 2010
@@ -38,6 +38,7 @@
   private int curTermLength;
   private int curGramSize;
   private int curPos;
+  private int tokStart;
   
   private TermAttribute termAtt;
   private OffsetAttribute offsetAtt;
@@ -82,13 +83,14 @@
           curTermLength = termAtt.termLength();
           curGramSize = minGram;
           curPos = 0;
+          tokStart = offsetAtt.startOffset();
         }
       }
       while (curGramSize <= maxGram) {
         while (curPos+curGramSize <= curTermLength) {     // while there is input
           clearAttributes();
           termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
-          offsetAtt.setOffset(curPos, curPos+curGramSize);
+          offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
           curPos++;
           return true;
         }

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Sun Feb 14 21:39:10 2010
@@ -92,7 +92,7 @@
   public void testSmallTokenInStream() throws Exception {
     input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
     EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
-    assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
+    assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
   }
   
   public void testReset() throws Exception {

Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Sun Feb 14 21:39:10 2010
@@ -82,7 +82,7 @@
     public void testSmallTokenInStream() throws Exception {
       input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
       NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
-      assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
+      assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
     }
     
     public void testReset() throws Exception {

Propchange: lucene/java/branches/lucene_2_9/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:889688,891211,900212,908479,909401
-/lucene/java/trunk/contrib/highlighter/src/test:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib/highlighter/src/test:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -2,4 +2,4 @@
 /lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:817269-818599
 /lucene/java/branches/lucene_2_9_back_compat_tests/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:818601-821336
 /lucene/java/branches/lucene_3_0/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:889463,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:821888,881213,881315,881466,881819,882374,882672,882807,882888,882977,883074-883075,883554,884870,886257,886911,887347,887532,887602,888247,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:821888,881213,881315,881466,881819,882374,882672,882807,882888,882977,883074-883075,883554,884870,886257,886911,887347,887532,887602,888247,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/analysis/Tokenizer.java:889688,891211,900212,908479,909401
-/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:821888,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:821888,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/util/AttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/util/AttributeSource.java:748824
 /lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/util/AttributeSource.java:886275,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,891189,891363,894348,897672,908975,910034
+/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,891189,891363,894348,897672,908975,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,3 +1,3 @@
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:886275,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,887347,887532,891189,891363,897672,908975,910034
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,887347,887532,891189,891363,897672,908975,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestDateTools.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestNumberTools.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/util/TestAttributeSource.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078