You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/04/10 20:36:35 UTC

svn commit: r1311915 - /lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java

Author: rmuir
Date: Tue Apr 10 18:36:34 2012
New Revision: 1311915

URL: http://svn.apache.org/viewvc?rev=1311915&view=rev
Log:
LUCENE-3969: demote the n-grams again (with explanation)

Modified:
    lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java

Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1311915&r1=1311914&r2=1311915&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Tue Apr 10 18:36:34 2012
@@ -113,7 +113,22 @@ public class TestRandomChains extends Ba
                                  LimitTokenCountFilter.class,
                                  // Not broken: we forcefully add this, so we shouldn't
                                  // also randomly pick it:
-                                 ValidatingTokenFilter.class
+                                 ValidatingTokenFilter.class,
+                                 // NOTE: these by themselves won't cause any 'basic assertions' to fail.
+                                 // but see https://issues.apache.org/jira/browse/LUCENE-3920, if any 
+                                 // tokenfilter that combines words (e.g. shingles) comes after them,
+                                 // this will create bogus offsets because their 'offsets go backwards',
+                                 // causing shingle or whatever to make a single token with a 
+                                 // startOffset thats > its endOffset
+                                 // (see LUCENE-3738 for a list of other offenders here)
+                                 // broken!
+                                 NGramTokenizer.class,
+                                 // broken!
+                                 NGramTokenFilter.class,
+                                 // broken!
+                                 EdgeNGramTokenizer.class,
+                                 // broken!
+                                 EdgeNGramTokenFilter.class
     );
   }
 
@@ -130,14 +145,6 @@ public class TestRandomChains extends Ba
                                  DictionaryCompoundWordTokenFilter.class,
                                  // nocommit: corrumpts graphs (offset consistency check):
                                  PositionFilter.class,
-                                 // broken!
-                                 NGramTokenizer.class,
-                                 // broken!
-                                 NGramTokenFilter.class,
-                                 // broken!
-                                 EdgeNGramTokenizer.class,
-                                 // broken!
-                                 EdgeNGramTokenFilter.class,
                                  // nocommit it seems to mess up offsets!?
                                  WikipediaTokenizer.class
                                  );