You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/04/11 18:01:08 UTC
svn commit: r1324834 - in /lucene/dev/branches/lucene3969:
lucene/test-framework/src/java/org/apache/lucene/analysis/
modules/analysis/common/src/java/org/apache/lucene/analysis/compound/
modules/analysis/common/src/java/org/apache/lucene/analysis/path...
Author: rmuir
Date: Wed Apr 11 16:01:07 2012
New Revision: 1324834
URL: http://svn.apache.org/viewvc?rev=1324834&view=rev
Log:
LUCENE-3969: clean up nocommits
Modified:
lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
Modified: lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java?rev=1324834&r1=1324833&r2=1324834&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java (original)
+++ lucene/dev/branches/lucene3969/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java Wed Apr 11 16:01:07 2012
@@ -27,13 +27,13 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.Attribute;
-// nocommit rename to OffsetsXXXTF? ie we only validate
+// TODO: rename to OffsetsXXXTF? ie we only validate
// offsets (now anyway...)
// TODO: also make a DebuggingTokenFilter, that just prints
// all att values that come through it...
-// nocommit BTSTC should just append this to the chain
+// TODO: BTSTC should just append this to the chain
// instead of checking itself:
/** A TokenFilter that checks consistency of the tokens (eg
@@ -155,7 +155,7 @@ public final class ValidatingTokenFilter
// TODO: what else to validate
- // nocommit check that endOffset is >= max(endOffset)
+ // TODO: check that endOffset is >= max(endOffset)
// we've seen
}
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=1324834&r1=1324833&r2=1324834&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java Wed Apr 11 16:01:07 2012
@@ -191,7 +191,7 @@ public class HyphenationCompoundWordToke
// we only put subwords to the token stream
// that are longer than minPartSize
if (partLength < this.minSubwordSize) {
- // nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
+ // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
continue;
}
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java?rev=1324834&r1=1324833&r2=1324834&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java Wed Apr 11 16:01:07 2012
@@ -81,7 +81,6 @@ public class ReversePathHierarchyTokeniz
throw new IllegalArgumentException("bufferSize cannot be negative");
}
if (skip < 0) {
- // nocommit: not quite right right here: see line 84... if skip > numTokensFound we always get a NegativeArrayException? needs fixing!
throw new IllegalArgumentException("skip cannot be negative");
}
termAtt.resizeBuffer(bufferSize);
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1324834&r1=1324833&r2=1324834&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Wed Apr 11 16:01:07 2012
@@ -195,8 +195,7 @@ public class TestMappingCharFilter exten
checkRandomData(random, analyzer, numRounds);
}
- // nocommit: wrong final offset, fix this!
- @Ignore
+ @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
public void testFinalOffsetSpecialCase() throws Exception {
final NormalizeCharMap map = new NormalizeCharMap();
map.add("t", "");
@@ -220,8 +219,7 @@ public class TestMappingCharFilter exten
checkAnalysisConsistency(random, analyzer, false, text);
}
- // nocommit: this is intended to fail until we fix bugs
- @Ignore
+ @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
public void testRandomMaps() throws Exception {
for (int i = 0; i < 100; i++) {
final NormalizeCharMap map = randomMap();
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1324834&r1=1324833&r2=1324834&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Wed Apr 11 16:01:07 2012
@@ -81,6 +81,7 @@ import org.apache.lucene.analysis.positi
import org.apache.lucene.analysis.snowball.TestSnowball;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
@@ -105,7 +106,7 @@ public class TestRandomChains extends Ba
// TODO: fix those and remove
private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
static {
- // nocommit can we promote some of these to be only
+ // TODO: can we promote some of these to be only
// offsets offenders?
Collections.<Class<?>>addAll(brokenComponents,
// TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt
@@ -132,7 +133,11 @@ public class TestRandomChains extends Ba
EdgeNGramTokenizer.class,
// broken!
EdgeNGramTokenFilter.class,
- // nocommit: remove this class after we fix its finalOffset bug
+ // broken!
+ WordDelimiterFilter.class,
+ // broken!
+ TrimFilter.class,
+ // TODO: remove this class after we fix its finalOffset bug
MappingCharFilter.class
);
}
@@ -142,16 +147,16 @@ public class TestRandomChains extends Ba
private static final Set<Class<?>> brokenOffsetsComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
static {
Collections.<Class<?>>addAll(brokenOffsetsComponents,
- WordDelimiterFilter.class,
- TrimFilter.class,
ReversePathHierarchyTokenizer.class,
PathHierarchyTokenizer.class,
HyphenationCompoundWordTokenFilter.class,
DictionaryCompoundWordTokenFilter.class,
- // nocommit: corrumpts graphs (offset consistency check):
+ // TODO: corrumpts graphs (offset consistency check):
PositionFilter.class,
- // nocommit it seems to mess up offsets!?
- WikipediaTokenizer.class
+ // TODO: it seems to mess up offsets!?
+ WikipediaTokenizer.class,
+ // TODO: doesn't handle graph inputs
+ ThaiWordFilter.class
);
}
@@ -271,7 +276,8 @@ public class TestRandomChains extends Ba
});
put(char.class, new ArgProducer() {
@Override public Object create(Random random) {
- // nocommit: fix any filters that care to throw IAE instead.
+ // TODO: fix any filters that care to throw IAE instead.
+ // also add a unicode validating filter to validate termAtt?
// return Character.valueOf((char)random.nextInt(65536));
while(true) {
char c = (char)random.nextInt(65536);
@@ -534,7 +540,7 @@ public class TestRandomChains extends Ba
// TODO: maybe the collator one...???
args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
} else if (paramType == AttributeSource.class) {
- // nocommit: args[i] = new AttributeSource();
+ // TODO: args[i] = new AttributeSource();
// this is currently too scary to deal with!
args[i] = null; // force IAE
} else {
@@ -583,7 +589,7 @@ public class TestRandomChains extends Ba
}
public boolean offsetsAreCorrect() {
- // nocommit: can we not do the full chain here!?
+ // TODO: can we not do the full chain here!?
Random random = new Random(seed);
TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
@@ -717,7 +723,7 @@ public class TestRandomChains extends Ba
while (true) {
final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
- // nocommit/hack: MockGraph/MockLookahead has assertions that will trip if they follow
+ // hack: MockGraph/MockLookahead has assertions that will trip if they follow
// an offsets violator. so we cant use them after e.g. wikipediatokenizer
if (!spec.offsetsAreCorrect &&
(ctor.getDeclaringClass().equals(MockGraphTokenFilter.class)