You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/01 11:42:30 UTC
svn commit: r1239044 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/src/test-framework/java/org/apache/lucene/analysis/
lucene/src/test/org/apache/lucene/analysis/
Author: rmuir
Date: Wed Feb 1 10:42:29 2012
New Revision: 1239044
URL: http://svn.apache.org/viewvc?rev=1239044&view=rev
Log:
LUCENE-3741: MockCharFilter offset correction is wrong
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java
lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
Modified: lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java?rev=1239044&r1=1239043&r2=1239044&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockCharFilter.java Wed Feb 1 10:42:29 2012
@@ -52,7 +52,7 @@ class MockCharFilter extends CharStream
bufferedCh = -1;
currentOffset++;
- addOffCorrectMap(currentOffset+delta, delta-1);
+ addOffCorrectMap(currentOffset, delta-1);
delta--;
return ch;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java?rev=1239044&r1=1239043&r2=1239044&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test-framework/java/org/apache/lucene/analysis/MockTokenizer.java Wed Feb 1 10:42:29 2012
@@ -69,6 +69,7 @@ public class MockTokenizer extends Token
};
private State streamState = State.CLOSE;
+ private int lastOffset = 0; // only for asserting
private boolean enableChecks = true;
public MockTokenizer(AttributeFactory factory, Reader input, int pattern, boolean lowerCase, int maxTokenLength) {
@@ -109,7 +110,15 @@ public class MockTokenizer extends Token
}
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
- offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
+
+ int correctedStartOffset = correctOffset(startOffset);
+ int correctedEndOffset = correctOffset(endOffset);
+ assert correctedStartOffset >= 0;
+ assert correctedEndOffset >= 0;
+ assert correctedStartOffset >= lastOffset;
+ lastOffset = correctedStartOffset;
+ assert correctedEndOffset >= correctedStartOffset;
+ offsetAtt.setOffset(correctedStartOffset, correctedEndOffset);
streamState = State.INCREMENT;
return true;
}
@@ -157,7 +166,7 @@ public class MockTokenizer extends Token
@Override
public void reset() throws IOException {
super.reset();
- off = 0;
+ lastOffset = off = 0;
assert !enableChecks || streamState != State.RESET : "double reset()";
streamState = State.RESET;
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java?rev=1239044&r1=1239043&r2=1239044&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java Wed Feb 1 10:42:29 2012
@@ -2,6 +2,8 @@ package org.apache.lucene.analysis;
import java.io.StringReader;
+import org.apache.lucene.util._TestUtil;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -87,4 +89,21 @@ public class TestMockAnalyzer extends Ba
public void testRandomStrings() throws Exception {
checkRandomData(random, new MockAnalyzer(random), atLeast(1000));
}
+
+ public void testForwardOffsets() throws Exception {
+ int num = atLeast(10000);
+ for (int i = 0; i < num; i++) {
+ String s = _TestUtil.randomHtmlishString(random, 20);
+ StringReader reader = new StringReader(s);
+ MockCharFilter charfilter = new MockCharFilter(CharReader.get(reader), 2);
+ MockAnalyzer analyzer = new MockAnalyzer(random);
+ TokenStream ts = analyzer.tokenStream("bogus", charfilter);
+ ts.reset();
+ while (ts.incrementToken()) {
+ ;
+ }
+ ts.end();
+ ts.close();
+ }
+ }
}