You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/04/09 16:31:25 UTC
svn commit: r1311257 - in
/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound:
CompoundWordTokenFilterBase.java HyphenationCompoundWordTokenFilter.java
Author: rmuir
Date: Mon Apr 9 14:31:25 2012
New Revision: 1311257
URL: http://svn.apache.org/viewvc?rev=1311257&view=rev
Log:
LUCENE-3969: don't allow negative subword params, Hyphenation relies upon this to filter out what appear to be bogus hyphenation points
Modified:
lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java?rev=1311257&r1=1311256&r2=1311257&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java Mon Apr 9 14:31:25 2012
@@ -82,8 +82,17 @@ public abstract class CompoundWordTokenF
super(input);
this.tokens=new LinkedList<CompoundToken>();
+ if (minWordSize < 0) {
+ throw new IllegalArgumentException("minWordSize cannot be negative");
+ }
this.minWordSize=minWordSize;
+ if (minSubwordSize < 0) {
+ throw new IllegalArgumentException("minSubwordSize cannot be negative");
+ }
this.minSubwordSize=minSubwordSize;
+ if (maxSubwordSize < 0) {
+ throw new IllegalArgumentException("maxSubwordSize cannot be negative");
+ }
this.maxSubwordSize=maxSubwordSize;
this.onlyLongestMatch=onlyLongestMatch;
this.dictionary = dictionary;
Modified: lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java?rev=1311257&r1=1311256&r2=1311257&view=diff
==============================================================================
--- lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/lucene3969/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java Mon Apr 9 14:31:25 2012
@@ -191,6 +191,8 @@ public class HyphenationCompoundWordToke
// we only put subwords to the token stream
// that are longer than minPartSize
if (partLength < this.minSubwordSize) {
+ // nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
+ // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
continue;
}