You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/02/14 22:33:13 UTC
svn commit: r910078 - in /lucene/java/trunk/contrib: ./
analyzers/common/src/java/org/apache/lucene/analysis/ngram/
analyzers/common/src/test/org/apache/lucene/analysis/ngram/
Author: uschindler
Date: Sun Feb 14 21:33:12 2010
New Revision: 910078
URL: http://svn.apache.org/viewvc?rev=910078&view=rev
Log:
LUCENE-2266: Fixed offset calculations in NGramTokenFilter and EdgeNGramTokenFilter
Modified:
lucene/java/trunk/contrib/CHANGES.txt
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=910078&r1=910077&r2=910078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Sun Feb 14 21:33:12 2010
@@ -153,6 +153,9 @@
CJKTokenizer, ChineseTokenizer, SmartChinese SentenceTokenizer,
and WikipediaTokenizer. (Koji Sekiguchi, Robert Muir)
+ * LUCENE-2266: Fixed offset calculations in NGramTokenFilter and
+ EdgeNGramTokenFilter. (Joe Calderon, Robert Muir via Uwe Schindler)
+
API Changes
* LUCENE-2108: Add SpellChecker.close, to close the underlying
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=910078&r1=910077&r2=910078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Sun Feb 14 21:33:12 2010
@@ -70,6 +70,7 @@
private char[] curTermBuffer;
private int curTermLength;
private int curGramSize;
+ private int tokStart;
private final TermAttribute termAtt;
private final OffsetAttribute offsetAtt;
@@ -126,6 +127,7 @@
curTermBuffer = (char[]) termAtt.termBuffer().clone();
curTermLength = termAtt.termLength();
curGramSize = minGram;
+ tokStart = offsetAtt.startOffset();
}
}
if (curGramSize <= maxGram) {
@@ -135,7 +137,7 @@
int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
int end = start + curGramSize;
clearAttributes();
- offsetAtt.setOffset(start, end);
+ offsetAtt.setOffset(tokStart + start, tokStart + end);
termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
curGramSize++;
return true;
Modified: lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=910078&r1=910077&r2=910078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Sun Feb 14 21:33:12 2010
@@ -37,6 +37,7 @@
private int curTermLength;
private int curGramSize;
private int curPos;
+ private int tokStart;
private TermAttribute termAtt;
private OffsetAttribute offsetAtt;
@@ -82,13 +83,14 @@
curTermLength = termAtt.termLength();
curGramSize = minGram;
curPos = 0;
+ tokStart = offsetAtt.startOffset();
}
}
while (curGramSize <= maxGram) {
while (curPos+curGramSize <= curTermLength) { // while there is input
clearAttributes();
termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
- offsetAtt.setOffset(curPos, curPos+curGramSize);
+ offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
curPos++;
return true;
}
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java?rev=910078&r1=910077&r2=910078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java Sun Feb 14 21:33:12 2010
@@ -94,7 +94,7 @@
public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh"));
EdgeNGramTokenFilter tokenizer = new EdgeNGramTokenFilter(input, EdgeNGramTokenFilter.Side.FRONT, 3, 3);
- assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
+ assertTokenStreamContents(tokenizer, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {
Modified: lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java?rev=910078&r1=910077&r2=910078&view=diff
==============================================================================
--- lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java (original)
+++ lucene/java/trunk/contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java Sun Feb 14 21:33:12 2010
@@ -83,7 +83,7 @@
public void testSmallTokenInStream() throws Exception {
input = new WhitespaceTokenizer(Version.LUCENE_CURRENT, new StringReader("abc de fgh"));
NGramTokenFilter filter = new NGramTokenFilter(input, 3, 3);
- assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,0}, new int[]{3,3});
+ assertTokenStreamContents(filter, new String[]{"abc","fgh"}, new int[]{0,7}, new int[]{3,10});
}
public void testReset() throws Exception {