You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/04/10 20:36:35 UTC
svn commit: r1311915 -
/lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
Author: rmuir
Date: Tue Apr 10 18:36:34 2012
New Revision: 1311915
URL: http://svn.apache.org/viewvc?rev=1311915&view=rev
Log:
LUCENE-3969: demote the n-grams again (with explanation)
Modified:
lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1311915&r1=1311914&r2=1311915&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Tue Apr 10 18:36:34 2012
@@ -113,7 +113,22 @@ public class TestRandomChains extends Ba
LimitTokenCountFilter.class,
// Not broken: we forcefully add this, so we shouldn't
// also randomly pick it:
- ValidatingTokenFilter.class
+ ValidatingTokenFilter.class,
+ // NOTE: these by themselves won't cause any 'basic assertions' to fail.
+ // but see https://issues.apache.org/jira/browse/LUCENE-3920, if any
+ // tokenfilter that combines words (e.g. shingles) comes after them,
+ // this will create bogus offsets because their 'offsets go backwards',
+ // causing shingle or whatever to make a single token with a
+ // startOffset thats > its endOffset
+ // (see LUCENE-3738 for a list of other offenders here)
+ // broken!
+ NGramTokenizer.class,
+ // broken!
+ NGramTokenFilter.class,
+ // broken!
+ EdgeNGramTokenizer.class,
+ // broken!
+ EdgeNGramTokenFilter.class
);
}
@@ -130,14 +145,6 @@ public class TestRandomChains extends Ba
DictionaryCompoundWordTokenFilter.class,
// nocommit: corrumpts graphs (offset consistency check):
PositionFilter.class,
- // broken!
- NGramTokenizer.class,
- // broken!
- NGramTokenFilter.class,
- // broken!
- EdgeNGramTokenizer.class,
- // broken!
- EdgeNGramTokenFilter.class,
// nocommit it seems to mess up offsets!?
WikipediaTokenizer.class
);