You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/02/14 22:39:11 UTC
svn commit: r910082 - in /lucene/java/branches/lucene_2_9: ./ contrib/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/
contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/
contrib/highlighter/src/test/ contrib/instant...
Author: uschindler
Date: Sun Feb 14 21:39:10 2010
New Revision: 910082
URL: http://svn.apache.org/viewvc?rev=910082&view=rev
Log:
LUCENE-2266: Fixed offset calculations in NGramTokenFilter and EdgeNGramTokenFilter
Modified:
lucene/java/branches/lucene_2_9/ (props changed)
lucene/java/branches/lucene_2_9/CHANGES.txt (props changed)
lucene/java/branches/lucene_2_9/build.xml (props changed)
lucene/java/branches/lucene_2_9/contrib/ (props changed)
lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (contents, props changed)
lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
lucene/java/branches/lucene_2_9/contrib/highlighter/src/test/ (props changed)
lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (props changed)
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java (props changed)
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java (props changed)
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/util/AttributeSource.java (props changed)
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (props changed)
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java (props changed)
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java (props changed)
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java (props changed)
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (props changed)
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java (props changed)
Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,3 +1,3 @@
/lucene/java/branches/lucene_2_4:748824
/lucene/java/branches/lucene_3_0:886275,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/CHANGES.txt:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/CHANGES.txt:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/CHANGES.txt:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/build.xml
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/build.xml:889688,891211,900212,908479,909401
-/lucene/java/trunk/build.xml:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/build.xml:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/contrib/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/contrib:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/contrib:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Modified: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/CHANGES.txt?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/contrib/CHANGES.txt Sun Feb 14 21:39:10 2010
@@ -32,6 +32,9 @@
CJKTokenizer, ChineseTokenizer, SmartChinese SentenceTokenizer,
and WikipediaTokenizer. (Koji Sekiguchi, Robert Muir)
+ * LUCENE-2266: Fixed offset calculations in NGramTokenFilter and
+ EdgeNGramTokenFilter. (Joe Calderon, Robert Muir via Uwe Schindler)
+
API Changes
* LUCENE-2108: Add SpellChecker.close, to close the underlying
Propchange: lucene/java/branches/lucene_2_9/contrib/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/contrib/CHANGES.txt:889688,891211,900212,908479,909401
-/lucene/java/trunk/contrib/CHANGES.txt:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib/CHANGES.txt:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034,910078
Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Sun Feb 14 21:39:10 2010
@@ -70,6 +70,7 @@
private char[] curTermBuffer;
private int curTermLength;
private int curGramSize;
+ private int tokStart;
private TermAttribute termAtt;
private OffsetAttribute offsetAtt;
@@ -132,6 +133,7 @@
curTermBuffer = (char[]) termAtt.termBuffer().clone();
curTermLength = termAtt.termLength();
curGramSize = minGram;
+ tokStart = offsetAtt.startOffset();
}
}
if (curGramSize <= maxGram) {
@@ -141,7 +143,7 @@
int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
int end = start + curGramSize;
clearAttributes();
- offsetAtt.setOffset(start, end);
+ offsetAtt.setOffset(tokStart + start, tokStart + end);
termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
curGramSize++;
return true;
Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Sun Feb 14 21:39:10 2010
@@ -38,6 +38,7 @@
private int curTermLength;
private int curGramSize;
private int curPos;
+ private int tokStart;
private TermAttribute termAtt;
private OffsetAttribute offsetAtt;
@@ -82,13 +83,14 @@
curTermLength = termAtt.termLength();
curGramSize = minGram;
curPos = 0;
+ tokStart = offsetAtt.startOffset();
}
}
while (curGramSize <= maxGram) {
while (curPos+curGramSize <= curTermLength) { // while there is input
clearAttributes();
termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
- offsetAtt.setOffset(curPos, curPos+curGramSize);
+ offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
curPos++;
return true;
}
Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Sun Feb 14 21:39:10 2010
@@ -92,7 +92,7 @@
public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
- assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
+ assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {
Modified: lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=910082&r1=910081&r2=910082&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/java/branches/lucene_2_9/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Sun Feb 14 21:39:10 2010
@@ -82,7 +82,7 @@
public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(new StringReader("abc de fgh"));
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
- assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
+ assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {
Propchange: lucene/java/branches/lucene_2_9/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:889688,891211,900212,908479,909401
-/lucene/java/trunk/contrib/highlighter/src/test:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib/highlighter/src/test:821888,889431-889432,891209,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -2,4 +2,4 @@
/lucene/java/branches/lucene_2_9/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:817269-818599
/lucene/java/branches/lucene_2_9_back_compat_tests/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:818601-821336
/lucene/java/branches/lucene_3_0/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:889463,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:821888,881213,881315,881466,881819,882374,882672,882807,882888,882977,883074-883075,883554,884870,886257,886911,887347,887532,887602,888247,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java:821888,881213,881315,881466,881819,882374,882672,882807,882888,882977,883074-883075,883554,884870,886257,886911,887347,887532,887602,888247,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/analysis/Tokenizer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/analysis/Tokenizer.java:889688,891211,900212,908479,909401
-/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:821888,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/java/org/apache/lucene/analysis/Tokenizer.java:821888,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/util/AttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,3 +1,3 @@
/lucene/java/branches/lucene_2_4/src/java/org/apache/lucene/util/AttributeSource.java:748824
/lucene/java/branches/lucene_3_0/src/java/org/apache/lucene/util/AttributeSource.java:886275,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,891189,891363,894348,897672,908975,910034
+/lucene/java/trunk/src/java/org/apache/lucene/util/AttributeSource.java:821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,886257,887347,887532,891189,891363,894348,897672,908975,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,3 +1,3 @@
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:886275,889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,887347,887532,891189,891363,897672,908975,910034
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,821888,824125,826029,826385,830871,833095,833297,833886,881819,882672,883554,884870,887347,887532,891189,891363,897672,908975,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestISOLatin1AccentFilter.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestDateTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestDateTools.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestDateTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/document/TestNumberTools.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/document/TestNumberTools.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/document/TestNumberTools.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/util/TestAttributeSource.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Sun Feb 14 21:39:10 2010
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_3_0/src/test/org/apache/lucene/util/TestAttributeSource.java:889688,891211,899639,900212,908479,909401
-/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034
+/lucene/java/trunk/src/test/org/apache/lucene/util/TestAttributeSource.java:821888,881819,886257,887347,887532,889431-889432,891189,891209,891363,897672,899627,900196,908477,908975,909398,910034,910078