You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/04/09 16:31:25 UTC

svn commit: r1311257 - in /lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound: CompoundWordTokenFilterBase.java HyphenationCompoundWordTokenFilter.java

Author: rmuir
Date: Mon Apr  9 14:31:25 2012
New Revision: 1311257

URL: http://svn.apache.org/viewvc?rev=1311257&view=rev
Log:
LUCENE-3969: don't allow negative subword params, Hyphenation relies upon this to filter out what appear to be bogus hyphenation points

Modified:
    lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
    lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java

Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=1311257&r1=1311256&r2=1311257&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Mon Apr  9 14:31:25 2012
@@ -82,8 +82,17 @@ public abstract class CompoundWordTokenF
     super(input);
     
     this.tokens=new LinkedList<CompoundToken>();
+    if (minWordSize < 0) {
+      throw new IllegalArgumentException("minWordSize cannot be negative");
+    }
     this.minWordSize=minWordSize;
+    if (minSubwordSize < 0) {
+      throw new IllegalArgumentException("minSubwordSize cannot be negative");
+    }
     this.minSubwordSize=minSubwordSize;
+    if (maxSubwordSize < 0) {
+      throw new IllegalArgumentException("maxSubwordSize cannot be negative");
+    }
     this.maxSubwordSize=maxSubwordSize;
     this.onlyLongestMatch=onlyLongestMatch;
     this.dictionary = dictionary;

Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=1311257&r1=1311256&r2=1311257&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java Mon Apr  9 14:31:25 2012
@@ -191,6 +191,8 @@ public class HyphenationCompoundWordToke
         // we only put subwords to the token stream
         // that are longer than minPartSize
         if (partLength < this.minSubwordSize) {
+          // nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the 
+          // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
           continue;
         }