You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/04/09 22:00:51 UTC
svn commit: r1311405 - in /lucene/dev/branches/lucene3969:
lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
Author: mikemccand
Date: Mon Apr 9 20:00:50 2012
New Revision: 1311405
URL: http://svn.apache.org/viewvc?rev=1311405&view=rev
Log:
LUCENE-3969: ValidatingTokenFilter shouldn't create new atts
Modified:
lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
Modified: lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java?rev=1311405&r1=1311404&r2=1311405&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java (original)
+++ lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java Mon Apr 9 20:00:50 2012
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.Attribute;
// nocommit better name...?
@@ -41,14 +42,22 @@ public final class ValidatingTokenFilter
private final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
private final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
- // nocommit must be more careful here? check hasAttribute first...?
- private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncAtt = getAttrIfExists(PositionIncrementAttribute.class);
+ private final PositionLengthAttribute posLenAtt = getAttrIfExists(PositionLengthAttribute.class);
+ private final OffsetAttribute offsetAtt = getAttrIfExists(OffsetAttribute.class);
+ private final CharTermAttribute termAtt = getAttrIfExists(CharTermAttribute.class);
private final String name;
+ // Returns null if the attr wasn't already added
+ private <A extends Attribute> A getAttrIfExists(Class<A> att) {
+ if (hasAttribute(att)) {
+ return getAttribute(att);
+ } else {
+ return null;
+ }
+ }
+
/** The name arg is used to identify this stage when
* throwing exceptions (useful if you have more than one
* instance in your chain). */
@@ -63,49 +72,61 @@ public final class ValidatingTokenFilter
return false;
}
- pos += posIncAtt.getPositionIncrement();
- if (pos == -1) {
- throw new IllegalStateException("first posInc must be > 0");
- }
+ if (posIncAtt != null && offsetAtt != null) {
+
+ pos += posIncAtt.getPositionIncrement();
+ if (pos == -1) {
+ throw new IllegalStateException("first posInc must be > 0");
+ }
- final int startOffset = offsetAtt.startOffset();
- final int endOffset = offsetAtt.endOffset();
+ final int startOffset = offsetAtt.startOffset();
+ final int endOffset = offsetAtt.endOffset();
- final int posLen = posLenAtt.getPositionLength();
- if (!posToStartOffset.containsKey(pos)) {
- // First time we've seen a token leaving from this position:
- posToStartOffset.put(pos, startOffset);
- System.out.println(" + s " + pos + " -> " + startOffset);
- } else {
- // We've seen a token leaving from this position
- // before; verify the startOffset is the same:
- System.out.println(" + vs " + pos + " -> " + startOffset);
- final int oldStartOffset = posToStartOffset.get(pos);
- if (oldStartOffset != startOffset) {
- throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
+ final int posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
+
+ if (!posToStartOffset.containsKey(pos)) {
+ // First time we've seen a token leaving from this position:
+ posToStartOffset.put(pos, startOffset);
+ System.out.println(" + s " + pos + " -> " + startOffset);
+ } else {
+ // We've seen a token leaving from this position
+ // before; verify the startOffset is the same:
+ System.out.println(" + vs " + pos + " -> " + startOffset);
+ final int oldStartOffset = posToStartOffset.get(pos);
+ if (oldStartOffset != startOffset) {
+ throw new IllegalStateException(name + ": inconsistent startOffset as pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
+ }
}
- }
- final int endPos = pos + posLen;
+ final int endPos = pos + posLen;
- if (!posToEndOffset.containsKey(endPos)) {
- // First time we've seen a token arriving to this position:
- posToEndOffset.put(endPos, endOffset);
- System.out.println(" + e " + endPos + " -> " + endOffset);
- } else {
- // We've seen a token arriving to this position
- // before; verify the endOffset is the same:
- System.out.println(" + ve " + endPos + " -> " + endOffset);
- final int oldEndOffset = posToEndOffset.get(endPos);
- if (oldEndOffset != endOffset) {
- throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
+ if (!posToEndOffset.containsKey(endPos)) {
+ // First time we've seen a token arriving to this position:
+ posToEndOffset.put(endPos, endOffset);
+ System.out.println(" + e " + endPos + " -> " + endOffset);
+ } else {
+ // We've seen a token arriving to this position
+ // before; verify the endOffset is the same:
+ System.out.println(" + ve " + endPos + " -> " + endOffset);
+ final int oldEndOffset = posToEndOffset.get(endPos);
+ if (oldEndOffset != endOffset) {
+ throw new IllegalStateException(name + ": inconsistent endOffset as pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
+ }
}
}
return true;
}
- // TODO: end? (what to validate?)
+ @Override
+ public void end() throws IOException {
+ super.end();
+
+ // TODO: what else to validate
+
+ // nocommit check that endOffset is >= max(endOffset)
+ // we've seen
+ }
@Override
public void reset() throws IOException {
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1311405&r1=1311404&r2=1311405&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Mon Apr 9 20:00:50 2012
@@ -111,7 +111,10 @@ public class TestRandomChains extends Ba
// broken!
EdgeNGramTokenizer.class,
// broken!
- EdgeNGramTokenFilter.class
+ EdgeNGramTokenFilter.class,
+ // Not broken: we forcefully add this, so we shouldn't
+ // also randomly pick it:
+ ValidatingTokenFilter.class
);
}
@@ -135,11 +138,6 @@ public class TestRandomChains extends Ba
continue;
}
- if (c == ValidatingTokenFilter.class) {
- // We insert this one ourselves after each stage...
- continue;
- }
-
for (final Constructor<?> ctor : c.getConstructors()) {
// don't test deprecated ctors, they likely have known bugs:
if (ctor.isAnnotationPresent(Deprecated.class) || ctor.isSynthetic()) {