You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2014/04/10 23:49:08 UTC
svn commit: r1586474 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/analysis/
lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java
Author: rmuir
Date: Thu Apr 10 21:49:08 2014
New Revision: 1586474
URL: http://svn.apache.org/r1586474
Log:
fix bug in buffering logic of this charfilter
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java
Modified: lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java?rev=1586474&r1=1586473&r2=1586474&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java Thu Apr 10 21:49:08 2014
@@ -104,7 +104,9 @@ public final class ICUNormalizer2CharFil
// if checkedInputBoundary was at the end of a buffer, we need to check that char again
checkedInputBoundary = Math.max(checkedInputBoundary - 1, 0);
- if (normalizer.isInert(tmpBuffer[len - 1]) && !Character.isHighSurrogate(tmpBuffer[len-1])) {
+ // this loop depends on 'isInert' (changes under normalization) but looks only at characters.
+ // so we treat all surrogates as non-inert for simplicity
+ if (normalizer.isInert(tmpBuffer[len - 1]) && !Character.isSurrogate(tmpBuffer[len-1])) {
return len;
} else return len + readInputToBuffer();
}