You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/17 19:59:39 UTC
svn commit: r1374379 - in /lucene/dev/trunk/lucene: ./
analysis/common/src/java/org/apache/lucene/analysis/charfilter/
analysis/common/src/test/org/apache/lucene/analysis/charfilter/
core/src/java/org/apache/lucene/util/fst/
Author: mikemccand
Date: Fri Aug 17 17:59:38 2012
New Revision: 1374379
URL: http://svn.apache.org/viewvc?rev=1374379&view=rev
Log:
LUCENE-4310: revert ... new test is failing
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1374379&r1=1374378&r2=1374379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Fri Aug 17 17:59:38 2012
@@ -79,10 +79,6 @@ Bug Fixes
Also, ensure immutability and use only one instance of this table in RAM (lazy
loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
-* LUCENE-4310: MappingCharFilter was failing to match input strings
- containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir,
- Mike McCandless)
-
Build
* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java?rev=1374379&r1=1374378&r2=1374379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java Fri Aug 17 17:59:38 2012
@@ -111,8 +111,9 @@ public class NormalizeCharMap {
final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
final IntsRef scratch = new IntsRef();
for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
- builder.add(Util.toUTF16(ent.getKey(), scratch),
+ builder.add(Util.toUTF32(ent.getKey(), scratch),
new CharsRef(ent.getValue()));
+
}
map = builder.finish();
pendingPairs.clear();
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1374379&r1=1374378&r2=1374379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Fri Aug 17 17:59:38 2012
@@ -33,7 +33,6 @@ import org.apache.lucene.analysis.CharFi
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@@ -56,11 +55,6 @@ public class TestMappingCharFilter exten
builder.add( "empty", "" );
- // BMP (surrogate pair):
- builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
-
- builder.add("\uff01", "full width exclamation");
-
normMap = builder.build();
}
@@ -134,18 +128,6 @@ public class TestMappingCharFilter exten
assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
}
- public void testNonBMPChar() throws Exception {
- CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
- TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
- assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
- }
-
- public void testFullWidthChar() throws Exception {
- CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
- TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
- assertTokenStreamContents(ts, new String[]{"full width exclamation"}, new int[]{0}, new int[]{1}, 1);
- }
-
//
// 1111111111222
// 01234567890123456789012
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1374379&r1=1374378&r2=1374379&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Fri Aug 17 17:59:38 2012
@@ -767,21 +767,6 @@ public final class Util {
}
}
- /** Just maps each UTF16 unit (char) to the ints in an
- * IntsRef. */
- public static IntsRef toUTF16(CharSequence s, IntsRef scratch) {
- final int charLimit = s.length();
- scratch.grow(charLimit);
- int idx = 0;
- while(idx < charLimit) {
- scratch.ints[idx] = (int) s.charAt(idx);
- idx++;
- }
- scratch.offset = 0;
- scratch.length = idx;
- return scratch;
- }
-
/** Decodes the Unicode codepoints from the provided
* CharSequence and places them in the provided scratch
* IntsRef, which must not be null, returning it. */