You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@harmony.apache.org by te...@apache.org on 2006/10/10 00:50:42 UTC
svn commit: r454541 [3/3] - in
/incubator/harmony/enhanced/classlib/trunk/modules/regex/src:
main/java/java/util/regex/
test/java/org/apache/harmony/tests/java/util/regex/
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java?view=diff&rev=454541&r1=454540&r2=454541
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java Mon Oct 9 15:50:40 2006
@@ -1257,6 +1257,349 @@
assertEquals(mat.end(), 13);
}
+
+ public void testCanonEqFlagWithSupplementaryCharacters() {
+
+ /*
+ * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32
+ * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F
+ * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16
+ */
+ String patString = "abc\uD834\uDDBFef";
+ String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
+ Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ);
+ Matcher mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ patString = "abc\uD834\uDDBB\uD834\uDD6Fef";
+ testString = "abc\uD834\uDDBFef";
+ pat = Pattern.compile(patString, Pattern.CANON_EQ);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
+ testString = "abc\uD834\uDDBFef";
+ pat = Pattern.compile(patString, Pattern.CANON_EQ);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ /*
+ * testSupplementary characters with no decomposition
+ */
+ patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef";
+ testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef";
+ pat = Pattern.compile(patString, Pattern.CANON_EQ);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+ }
+
+ public void testRangesWithSurrogatesSupplementary() {
+ String patString = "[abc\uD8D2]";
+ String testString = "\uD8D2";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "a";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "ef\uD8D2\uDD71gh";
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "ef\uD8D2gh";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "[abc\uD8D3&&[c\uD8D3]]";
+ testString = "c";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "a";
+ mat = pat.matcher(testString);
+ assertFalse(mat.matches());
+
+ testString = "ef\uD8D3\uDD71gh";
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "ef\uD8D3gh";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
+ testString = "c";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\uDBEE\uDF0C";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "ef\uD8D3\uDD71gh";
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "ef\uD8D3gh";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "[abc\uDBFC]\uDDC2cd";
+ testString = "\uDBFC\uDDC2cd";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertFalse(mat.matches());
+
+ testString = "a\uDDC2cd";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+ }
+
+ public void testSequencesWithSurrogatesSupplementary() {
+ String patString = "abcd\uD8D3";
+ String testString = "abcd\uD8D3\uDFFC";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "abcd\uD8D3abc";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "ab\uDBEFcd";
+ testString = "ab\uDBEFcd";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ patString = "\uDFFCabcd";
+ testString = "\uD8D3\uDFFCabcd";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "abc\uDFFCabcdecd";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "\uD8D3\uDFFCabcd";
+ testString = "abc\uD8D3\uD8D3\uDFFCabcd";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+ }
+
+ public void testPredefinedClassesWithSurrogatesSupplementary() {
+ String patString = "[123\\D]";
+ String testString = "a";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ testString = "5";
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "3";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ //low surrogate
+ testString = "\uDFC4";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ //high surrogate
+ testString = "\uDADA";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ testString = "\uDADA\uDFC4";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "[123[^\\p{javaDigit}]]";
+ testString = "a";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ testString = "5";
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "3";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ //low surrogate
+ testString = "\uDFC4";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ //high surrogate
+ testString = "\uDADA";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ testString = "\uDADA\uDFC4";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ //surrogate characters
+ patString = "\\p{Cs}";
+ testString = "\uD916\uDE27";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+
+ /*
+ * see http://www.unicode.org/reports/tr18/#Supplementary_Characters
+ * we have to treat text as code points not code units.
+ * \\p{Cs} matches any surrogate character but here testString
+ * is a one code point consisting of two code units (two surrogate
+ * characters) so we find nothing
+ */
+ assertFalse(mat.find());
+
+ //swap low and high surrogates
+ testString = "\uDE27\uD916";
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
+ testString = "1";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ testString = "\uD916";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+
+ testString = "\uD916\uDE27";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.find());
+
+ //\uD9A0\uDE8E=\u7828E
+ //\u78281=\uD9A0\uDE81
+ patString = "[a-\uD9A0\uDE8E]";
+ testString = "\uD9A0\uDE81";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+ }
+
+ public void testDotConstructionWithSurrogatesSupplementary() {
+ String patString = ".";
+ String testString = "\uD9A0\uDE81";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\uDE81";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\uD9A0";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\n";
+ mat = pat.matcher(testString);
+ assertFalse(mat.matches());
+
+ patString = ".*\uDE81";
+ testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81";
+ pat = Pattern.compile(patString);
+ mat = pat.matcher(testString);
+ assertFalse(mat.matches());
+
+ testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ patString = ".*";
+ testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81";
+ pat = Pattern.compile(patString, Pattern.DOTALL);
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+ }
+
+ public void testQuantifiersWithSurrogatesSupplementary() {
+ String patString = "\uD9A0\uDE81*abc";
+ String testString = "\uD9A0\uDE81\uD9A0\uDE81abc";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "abc";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+ }
+
+ public void testAlternationsWithSurrogatesSupplementary() {
+ String patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
+ String testString = "\uD9A0";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\uDE81";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\uD9A0\uDE81";
+ mat = pat.matcher(testString);
+ assertTrue(mat.matches());
+
+ testString = "\uDE81\uD9A0";
+ mat = pat.matcher(testString);
+ assertFalse(mat.matches());
+ }
+
+ public void testGroupsWithSurrogatesSupplementary() {
+
+ //this pattern matches nothing
+ String patString = "(\uD9A0)\uDE81";
+ String testString = "\uD9A0\uDE81";
+ Pattern pat = Pattern.compile(patString);
+ Matcher mat = pat.matcher(testString);
+ assertFalse(mat.matches());
+
+ patString = "(\uD9A0)";
+ testString = "\uD9A0\uDE81";
+ pat = Pattern.compile(patString, Pattern.DOTALL);
+ mat = pat.matcher(testString);
+ assertFalse(mat.find());
+ }
+
+ /*
+ * Regression test for HARMONY-688
+ */
+ public void testUnicodeCategoryWithSurrogatesSupplementary() {
+ Pattern p = Pattern.compile("\\p{javaLowerCase}");
+ Matcher matcher = p.matcher("\uD801\uDC28");
+ assertTrue(matcher.find());
+ }
+
public static void main(String[] args) {
junit.textui.TestRunner.run(PatternTest.class);
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java?view=diff&rev=454541&r1=454540&r2=454541
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java Mon Oct 9 15:50:40 2006
@@ -132,17 +132,22 @@
assertEquals("c", s[3]);
assertEquals("d", s[4]);
assertEquals("", s[5]);
+ }
- // Match with a surrogate pair .. strangely splits the surrogate pair. I
- // would have expected
- // the third matched string to be "\ud869\uded6" (aka \u2a6d6)
+ public void testSplitSupplementaryWithEmptyString() {
+
+ /*
+ * See http://www.unicode.org/reports/tr18/#Supplementary_Characters
+ * We have to treat text as code points not code units.
+ */
+ Pattern p = Pattern.compile("");
+ String s[];
s = p.split("a\ud869\uded6b", -1);
- assertEquals(6, s.length);
+ assertEquals(5, s.length);
assertEquals("", s[0]);
assertEquals("a", s[1]);
- assertEquals("\ud869", s[2]);
- assertEquals("\uded6", s[3]);
- assertEquals("b", s[4]);
- assertEquals("", s[5]);
+ assertEquals("\ud869\uded6", s[2]);
+ assertEquals("b", s[3]);
+ assertEquals("", s[4]);
}
}