You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/10/23 17:09:06 UTC

svn commit: r1187902 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/src/test/ lucene/contrib/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/...

Author: rmuir
Date: Sun Oct 23 15:09:06 2011
New Revision: 1187902

URL: http://svn.apache.org/viewvc?rev=1187902&view=rev
Log:
LUCENE-3301: add workaround for jre breakiterator bugs

Added:
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java
      - copied unchanged from r1187901, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/util/
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java
      - copied unchanged from r1187900, lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/backwards/src/test/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1187902&r1=1187901&r2=1187902&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Sun Oct 23 15:09:06 2011
@@ -71,6 +71,10 @@ Bug Fixes
    to retrieve top groups for any BlockJoinQuery after the first (Mark
    Harwood, Mike McCandless)
 
+ * LUCENE-3301: Added a workaround for buggy BreakIterator implementations in
+   Java that crash on certain inputs containing supplementary characters.
+   (Robert Muir)
+
 API Changes
  
  * LUCENE-3436: Add SuggestMode to the spellchecker, so you can specify the strategy

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java?rev=1187902&r1=1187901&r2=1187902&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java Sun Oct 23 15:09:06 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.th;
 import java.io.IOException;
 import java.util.Locale;
 import java.lang.Character.UnicodeBlock;
-import javax.swing.text.Segment;
 import java.text.BreakIterator;
 
 import org.apache.lucene.analysis.TokenFilter;
@@ -28,6 +27,7 @@ import org.apache.lucene.analysis.LowerC
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArrayIterator;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 
@@ -56,7 +56,7 @@ public final class ThaiWordFilter extend
     DBBI_AVAILABLE = proto.isBoundary(4);
   }
   private final BreakIterator breaker = (BreakIterator) proto.clone();
-  private final Segment charIterator = new Segment();
+  private final CharArrayIterator charIterator = CharArrayIterator.newWordInstance();
   
   private final boolean handlePosIncr;
   
@@ -121,9 +121,7 @@ public final class ThaiWordFilter extend
     }
     
     // reinit CharacterIterator
-    charIterator.array = clonedTermAtt.buffer();
-    charIterator.offset = 0;
-    charIterator.count = clonedTermAtt.length();
+    charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length());
     breaker.setText(charIterator);
     int end = breaker.next();
     if (end != BreakIterator.DONE) {