You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@harmony.apache.org by te...@apache.org on 2006/10/10 02:44:46 UTC
svn commit: r454575 [2/2] - in
/incubator/harmony/enhanced/classlib/trunk/modules/regex/src:
main/java/java/util/regex/
test/java/org/apache/harmony/tests/java/util/regex/
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java Mon Oct 9 17:44:44 2006
@@ -304,10 +304,12 @@
if (lexemes.peek() == Lexer.CHAR_VERTICAL_BAR)
lexemes.next();
}
- AbstractSet rangeSet = processRangeSet(auxRange);
- rangeSet.setNext(last);
-
- return rangeSet;
+
+ if (!auxRange.hasUCI()) {
+ return new RangeSet(auxRange, last);
+ } else {
+ return new UCIRangeSet(auxRange, last);
+ }
}
/**
@@ -435,11 +437,8 @@
*/
private AbstractSet processSequence(AbstractSet last) {
StringBuffer substring = new StringBuffer();
-
while (!lexemes.isEmpty()
&& lexemes.isLetter()
- && !lexemes.isHighSurrogate()
- && !lexemes.isLowSurrogate()
&& ((!lexemes.isNextSpecial() && lexemes.lookAhead() == 0) // end
// of
// pattern
@@ -449,13 +448,7 @@
|| (lexemes.lookAhead() & 0x8000ffff) == Lexer.CHAR_LEFT_PARENTHESIS
|| lexemes.lookAhead() == Lexer.CHAR_VERTICAL_BAR || lexemes
.lookAhead() == Lexer.CHAR_DOLLAR)) {
- int ch = lexemes.next();
-
- if (Character.isSupplementaryCodePoint(ch)) {
- substring.append(Character.toChars(ch));
- } else {
- substring.append((char) ch);
- }
+ substring.append((char) lexemes.next());
}
if (!hasFlag(Pattern.CASE_INSENSITIVE)) {
return new SequenceSet(substring);
@@ -477,7 +470,7 @@
int curSymbIndex = -1;
if (!lexemes.isEmpty() && lexemes.isLetter()) {
- curSymb = lexemes.next();
+ curSymb = lexemes.nextChar();
codePoints [readCodePoints] = curSymb;
curSymbIndex = curSymb - Lexer.LBase;
}
@@ -493,12 +486,12 @@
codePointsHangul[readCodePoints++] = (char) curSymb;
curSymb = lexemes.peek();
- curSymbIndex = curSymb - Lexer.VBase;
- if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.VCount)) {
- codePointsHangul [readCodePoints++] = (char) curSymb;
- lexemes.next();
- curSymb = lexemes.peek();
- curSymbIndex = curSymb - Lexer.TBase;
+ curSymbIndex = curSymb - Lexer.VBase;
+ if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.VCount)) {
+ codePointsHangul [readCodePoints++] = (char) curSymb;
+ lexemes.next();
+ curSymb = lexemes.peek();
+ curSymbIndex = curSymb - Lexer.TBase;
if ((curSymbIndex >= 0) && (curSymbIndex < Lexer.TCount)) {
codePointsHangul [readCodePoints++] = (char) curSymb;
lexemes.next();
@@ -510,18 +503,18 @@
//LV syllable
return new HangulDecomposedCharSet(codePointsHangul, 2);
}
- } else {
+ } else {
//L jamo
if (!hasFlag(Pattern.CASE_INSENSITIVE)) {
- return new CharSet(codePointsHangul[0]);
- } else if (!hasFlag(Pattern.UNICODE_CASE)) {
- return new CICharSet(codePointsHangul[0]);
- } else {
- return new UCICharSet(codePointsHangul[0]);
- }
- }
-
+ return new CharSet(codePointsHangul[0]);
+ } else if (!hasFlag(Pattern.UNICODE_CASE)) {
+ return new CICharSet(codePointsHangul[0]);
+ } else {
+ return new UCICharSet(codePointsHangul[0]);
+ }
+ }
+
/*
* We process single codepoint or decomposed codepoint.
* We collect decomposed codepoint and obtain
@@ -533,15 +526,31 @@
while((readCodePoints < Lexer.MAX_DECOMPOSITION_LENGTH)
&& !lexemes.isEmpty() && lexemes.isLetter()
&& !Lexer.isDecomposedCharBoundary(lexemes.peek())) {
- codePoints [readCodePoints++] = lexemes.next();
+ codePoints [readCodePoints++] = lexemes.nextChar();
}
-
+
+ if (readCodePoints == 0) {
+ return null;
+ }
+
/*
- * We have read an ordinary symbol.
+ * We have read an ordinary Basic Multilingual Pane symbol.
*/
- if (readCodePoints == 1
+ if (readCodePoints == 1
+
+ /*
+ * We compile supplementary codepoint into
+ * DecomposedCharSet for convenience.
+ */
+ && curSymb <= Lexer.MAX_CODEPOINT_BASIC_MULTILINGUAL_PANE
&& !Lexer.hasSingleCodepointDecomposition(codePoints[0])) {
- return processCharSet(codePoints[0]);
+ if (!hasFlag(Pattern.CASE_INSENSITIVE)) {
+ return new CharSet((char) codePoints[0]);
+ } else if (!hasFlag(Pattern.UNICODE_CASE)) {
+ return new CICharSet((char) codePoints[0]);
+ } else {
+ return new UCICharSet((char) codePoints[0]);
+ }
} else {
if (!hasFlag(Pattern.CASE_INSENSITIVE)) {
return new DecomposedCharSet(codePoints, readCodePoints);
@@ -572,9 +581,6 @@
&& !lexemes.isLetter()) {
cur = processQuantifier(last, cur);
}
- } else if (lexemes.isHighSurrogate() || lexemes.isLowSurrogate()) {
- AbstractSet term = processTerminal(last);
- cur = processQuantifier(last, term);
} else {
cur = processSequence(last);
}
@@ -636,19 +642,8 @@
switch (quant) {
case Lexer.QUANT_STAR:
case Lexer.QUANT_PLUS: {
- QuantifierSet q;
-
lexemes.next();
- if (term.getType() == AbstractSet.TYPE_DOTSET) {
- if (!hasFlag(Pattern.DOTALL)) {
- q = new DotQuantifierSet(term, last, quant,
- AbstractLineTerminator.getInstance(flags));
- } else {
- q = new DotAllQuantifierSet(term, last, quant);
- }
- } else {
- q = new GroupQuantifierSet(term, last, quant);
- }
+ GroupQuantifierSet q = new GroupQuantifierSet(term, last, quant);
term.setNext(q);
return q;
}
@@ -731,8 +726,17 @@
case Lexer.QUANT_STAR:
case Lexer.QUANT_PLUS: {
lexemes.next();
- LeafQuantifierSet q = new LeafQuantifierSet(leaf,
- last, quant);
+ LeafQuantifierSet q;
+ if (term.getType() == AbstractSet.TYPE_DOTSET) {
+ if (!hasFlag(Pattern.DOTALL)) {
+ q = new DotQuantifierSet(leaf, last, quant,
+ AbstractLineTerminator.getInstance(flags));
+ } else {
+ q = new DotAllQuantifierSet(leaf, last, quant);
+ }
+ } else {
+ q = new LeafQuantifierSet(leaf, last, quant);
+ }
leaf.setNext(q);
return q;
}
@@ -955,10 +959,8 @@
case 0: {
AbstractCharClass cc = null;
if ((cc = (AbstractCharClass) lexemes.peekSpecial()) != null) {
- term = processRangeSet(cc);
+ term = new RangeSet(cc);
} else if (!lexemes.isEmpty()) {
-
- //ch == 0
term = new CharSet((char) ch);
} else {
term = new EmptySet(last);
@@ -970,7 +972,19 @@
default: {
if (ch >= 0 && !lexemes.isSpecial()) {
- term = processCharSet(ch);
+ if (hasFlag(Pattern.CASE_INSENSITIVE)) {
+ if ((ch >= 'a' && ch <= 'z')
+ || (ch >= 'A' && ch <= 'Z')) {
+ term = new CICharSet((char) ch);
+ } else if (hasFlag(Pattern.UNICODE_CASE)
+ && ch > 128) {
+ term = new UCICharSet((char) ch);
+ } else {
+ term = new CharSet((char) ch);
+ }
+ } else {
+ term = new CharSet((char) ch);
+ }
lexemes.next();
} else if (ch == Lexer.CHAR_VERTICAL_BAR) {
term = new EmptySet(last);
@@ -998,16 +1012,17 @@
private AbstractSet processRange(boolean negative, AbstractSet last) {
AbstractCharClass res = processRangeExpression(negative);
- AbstractSet rangeSet = processRangeSet(res);
- rangeSet.setNext(last);
-
- return rangeSet;
+ if (!res.hasUCI()) {
+ return new RangeSet(res, last);
+ } else {
+ return new UCIRangeSet(res, last);
+ }
}
/**
* proceess [...] ranges
*/
- private CharClass processRangeExpression(boolean alt) {
+ private AbstractCharClass processRangeExpression(boolean alt) {
CharClass res = new CharClass(alt, hasFlag(Pattern.CASE_INSENSITIVE),
hasFlag(Pattern.UNICODE_CASE));
int buffer = -1;
@@ -1028,10 +1043,6 @@
break;
}
case Lexer.CHAR_LEFT_SQUARE_BRACKET: {
- if (buffer >= 0) {
- res.add(buffer);
- buffer = -1;
- }
lexemes.next();
boolean negative = false;
if (lexemes.peek() == Lexer.CHAR_CARET) {
@@ -1052,37 +1063,13 @@
if (buffer >= 0)
res.add(buffer);
buffer = lexemes.next();
-
- /*
- * if there is a start for subrange we will do an intersection
- * otherwise treat '&' as a normal character
- */
- if (lexemes.peek() == Lexer.CHAR_AMPERSAND) {
- if (lexemes.lookAhead()
- == Lexer.CHAR_LEFT_SQUARE_BRACKET) {
- lexemes.next();
- intersection = true;
- buffer = -1;
- } else {
- lexemes.next();
- if (firstInClass) {
-
- //skip "&&" at "[&&...]" or "[^&&...]"
- res = processRangeExpression(false);
- } else {
-
- //ignore "&&" at "[X&&]" ending where X != empty string
- if (!(lexemes.peek()
- == Lexer.CHAR_RIGHT_SQUARE_BRACKET)) {
- res.intersection(processRangeExpression(false));
- }
- }
-
- }
- } else {
-
- //treat '&' as a normal character
- buffer = '&';
+ // if there is a start for subrange we will do an intersection
+ // otherwise treat '&' as normal character
+ if (lexemes.peek() == Lexer.CHAR_AMPERSAND
+ && lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET) {
+ lexemes.next();
+ intersection = true;
+ buffer = -1;
}
break;
@@ -1109,10 +1096,7 @@
|| lexemes.lookAhead() == Lexer.CHAR_LEFT_SQUARE_BRACKET || buffer < 0)) {
try {
- if (!Lexer.isLetter(cur)) {
- cur = cur & 0xFFFF;
- }
- res.add(buffer, cur);
+ res.add(buffer, (char) lexemes.peek());
} catch (Exception e) {
throw new PatternSyntaxException(
Messages.getString("regex.0E"), //$NON-NLS-1$
@@ -1130,14 +1114,6 @@
break;
}
- case Lexer.CHAR_CARET: {
- if (buffer >= 0)
- res.add(buffer);
- buffer = '^';
- lexemes.next();
- break;
- }
-
case 0: {
if (buffer >= 0)
res.add(buffer);
@@ -1173,88 +1149,6 @@
return res;
}
- private AbstractSet processCharSet(int ch) {
- boolean isSupplCodePoint = Character
- .isSupplementaryCodePoint(ch);
-
- if (hasFlag(Pattern.CASE_INSENSITIVE)) {
-
- if ((ch >= 'a' && ch <= 'z')
- || (ch >= 'A' && ch <= 'Z')) {
- return new CICharSet((char) ch);
- } else if (hasFlag(Pattern.UNICODE_CASE)
- && ch > 128) {
- if (isSupplCodePoint) {
- return new UCISupplCharSet(ch);
- } else if (Lexer.isLowSurrogate(ch)) {
-
- //we need no UCILowSurrogateCharSet
- return new LowSurrogateCharSet((char) ch);
- } else if (Lexer.isHighSurrogate(ch)) {
-
- //we need no UCIHighSurrogateCharSet
- return new HighSurrogateCharSet((char) ch);
- } else {
- return new UCICharSet((char) ch);
- }
- }
- }
-
- if (isSupplCodePoint) {
- return new SupplCharSet(ch);
- } else if (Lexer.isLowSurrogate(ch)) {
- return new LowSurrogateCharSet((char) ch);
- } else if (Lexer.isHighSurrogate(ch)) {
- return new HighSurrogateCharSet((char) ch);
- } else {
- return new CharSet((char) ch);
- }
- }
-
- private AbstractSet processRangeSet(AbstractCharClass charClass) {
- if (charClass.hasLowHighSurrogates()) {
- AbstractCharClass surrogates = charClass.getSurrogates();
- LowHighSurrogateRangeSet lowHighSurrRangeSet
- = new LowHighSurrogateRangeSet(surrogates);
-
- if (charClass.mayContainSupplCodepoints()) {
- if (!charClass.hasUCI()) {
- return new CompositeRangeSet(
- new SupplRangeSet(charClass.getWithoutSurrogates()),
- lowHighSurrRangeSet);
- } else {
- return new CompositeRangeSet(
- new UCISupplRangeSet(charClass.getWithoutSurrogates()),
- lowHighSurrRangeSet);
- }
- }
-
- if (!charClass.hasUCI()) {
- return new CompositeRangeSet(
- new RangeSet(charClass.getWithoutSurrogates()),
- lowHighSurrRangeSet);
- } else {
- return new CompositeRangeSet(
- new UCIRangeSet(charClass.getWithoutSurrogates()),
- lowHighSurrRangeSet);
- }
- }
-
- if (charClass.mayContainSupplCodepoints()) {
- if (!charClass.hasUCI()) {
- return new SupplRangeSet(charClass);
- } else {
- return new UCISupplRangeSet(charClass);
- }
- }
-
- if (!charClass.hasUCI()) {
- return new RangeSet(charClass);
- } else {
- return new UCIRangeSet(charClass);
- }
- }
-
/**
* @com.intel.drl.spec_ref
*/
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -32,7 +32,7 @@
public PosPlusGroupQuantifierSet(AbstractSet innerSet, AbstractSet next,
int type) {
super(innerSet, next, type);
- ((JointSet) innerSet).setNext(FSet.posFSet);
+ ((JointSet) innerSet).fSet.setNext(FSet.posFSet);
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java Mon Oct 9 17:44:44 2006
@@ -58,18 +58,8 @@
return AbstractCharClass.intersects(chars, ((CharSet) set)
.getChar());
} else if (set instanceof RangeSet) {
- return AbstractCharClass.intersects(chars, ((RangeSet) set)
- .chars);
- } else if (set instanceof SupplRangeSet) {
- return AbstractCharClass.intersects(chars, ((SupplRangeSet) set)
- .getChars());
- } else if (set instanceof SupplCharSet) {
- return false;
+ return AbstractCharClass.intersects(chars, ((RangeSet) set).chars);
}
return true;
- }
-
- protected AbstractCharClass getChars() {
- return chars;
}
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -36,6 +36,7 @@
public int matches(int stringIndex, CharSequence testString,
MatchResultImpl matchResult) {
+ int i = 0;
int shift = 0;
do {
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java Mon Oct 9 17:44:44 2006
@@ -107,16 +107,6 @@
return ((CharSet) set).getChar() == string.charAt(0);
} else if (set instanceof RangeSet) {
return ((RangeSet) set).accepts(0, string.substring(0, 1)) > 0;
- } else if (set instanceof SupplRangeSet) {
- return ((SupplRangeSet) set).contains(string.charAt(0))
- || ((string.length() > 1) && ((SupplRangeSet) set).contains(Character
- .toCodePoint(string.charAt(0), string.charAt(1))));
- } else if ((set instanceof SupplCharSet)) {
- return (string.length() > 1)
- ? ((SupplCharSet) set).getCodePoint()
- == Character.toCodePoint(string.charAt(0),
- string.charAt(1))
- : false;
}
return true;
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java Mon Oct 9 17:44:44 2006
@@ -19,7 +19,7 @@
/**
* This class gives us a hashtable that contains information about
- * symbols that are one symbol decompositions that is
+ * symbols that have decomposition and canonical class 0 that is
* generated from
* http://www.unicode.org/Public/4.0-Update/UnicodeData-4.0.0.txt.
*/
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java Mon Oct 9 17:44:44 2006
@@ -44,4 +44,8 @@
protected String getName() {
return "UCI " + ch; //$NON-NLS-1$
}
+
+ protected char getChar() {
+ return ch;
+ }
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java Mon Oct 9 17:44:44 2006
@@ -41,11 +41,6 @@
this.alt = cs.alt;
}
- public UCIRangeSet(AbstractCharClass cc) {
- this.chars = cc.getInstance();
- this.alt = cc.alt;
- }
-
public int accepts(int strIndex, CharSequence testString) {
return (chars.contains(Character.toLowerCase(Character
.toUpperCase(testString.charAt(strIndex))))) ? 1 : -1;
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -44,7 +44,7 @@
MatchResultImpl matchResult) {
while (stringIndex + leaf.charCount() <= matchResult.getRightBound()
&& leaf.accepts(stringIndex, testString) > 0)
- stringIndex += leaf.charCount();
+ stringIndex++;
return next.matches(stringIndex, testString, matchResult);
}
@@ -54,11 +54,11 @@
int startSearch = next.find(stringIndex, testString, matchResult);
if (startSearch < 0)
return -1;
- int newSearch = startSearch - leaf.charCount();
+ int newSearch = startSearch - 1;
while (newSearch >= stringIndex
&& leaf.accepts(newSearch, testString) > 0) {
startSearch = newSearch;
- newSearch -= leaf.charCount();
+ newSearch--;
}
return startSearch;
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java Mon Oct 9 17:44:44 2006
@@ -582,7 +582,7 @@
assertFalse(Pattern.matches("[\\p{Lu}a-d]", "k"));
assertTrue(Pattern.matches("[a-d\\p{Lu}]", "K"));
-// assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^K]]]", "K"));
+ assertTrue(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^K]]]", "K"));
assertFalse(Pattern.matches("[\\p{L}&&[^\\p{Lu}&&[^G]]]", "K"));
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java Mon Oct 9 17:44:44 2006
@@ -1257,349 +1257,6 @@
assertEquals(mat.end(), 13);
}
-
- public void testCanonEqFlagWithSupplementaryCharacters() {
-
- /*
- * \u1D1BF->\u1D1BB\u1D16F->\u1D1B9\u1D165\u1D16F in UTF32
- * \uD834\uDDBF->\uD834\uDDBB\uD834\uDD6F
- * ->\uD834\uDDB9\uD834\uDD65\uD834\uDD6F in UTF16
- */
- String patString = "abc\uD834\uDDBFef";
- String testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
- Pattern pat = Pattern.compile(patString, Pattern.CANON_EQ);
- Matcher mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- patString = "abc\uD834\uDDBB\uD834\uDD6Fef";
- testString = "abc\uD834\uDDBFef";
- pat = Pattern.compile(patString, Pattern.CANON_EQ);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- patString = "abc\uD834\uDDB9\uD834\uDD65\uD834\uDD6Fef";
- testString = "abc\uD834\uDDBFef";
- pat = Pattern.compile(patString, Pattern.CANON_EQ);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "abc\uD834\uDDBB\uD834\uDD6Fef";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- /*
- * testSupplementary characters with no decomposition
- */
- patString = "a\uD9A0\uDE8Ebc\uD834\uDDBB\uD834\uDD6Fe\uDE8Ef";
- testString = "a\uD9A0\uDE8Ebc\uD834\uDDBFe\uDE8Ef";
- pat = Pattern.compile(patString, Pattern.CANON_EQ);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
- }
-
- public void testRangesWithSurrogatesSupplementary() {
- String patString = "[abc\uD8D2]";
- String testString = "\uD8D2";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "a";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "ef\uD8D2\uDD71gh";
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "ef\uD8D2gh";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "[abc\uD8D3&&[c\uD8D3]]";
- testString = "c";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "a";
- mat = pat.matcher(testString);
- assertFalse(mat.matches());
-
- testString = "ef\uD8D3\uDD71gh";
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "ef\uD8D3gh";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "[abc\uD8D3\uDBEE\uDF0C&&[c\uD8D3\uDBEE\uDF0C]]";
- testString = "c";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\uDBEE\uDF0C";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "ef\uD8D3\uDD71gh";
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "ef\uD8D3gh";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "[abc\uDBFC]\uDDC2cd";
- testString = "\uDBFC\uDDC2cd";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertFalse(mat.matches());
-
- testString = "a\uDDC2cd";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
- }
-
- public void testSequencesWithSurrogatesSupplementary() {
- String patString = "abcd\uD8D3";
- String testString = "abcd\uD8D3\uDFFC";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "abcd\uD8D3abc";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "ab\uDBEFcd";
- testString = "ab\uDBEFcd";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- patString = "\uDFFCabcd";
- testString = "\uD8D3\uDFFCabcd";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "abc\uDFFCabcdecd";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "\uD8D3\uDFFCabcd";
- testString = "abc\uD8D3\uD8D3\uDFFCabcd";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.find());
- }
-
- public void testPredefinedClassesWithSurrogatesSupplementary() {
- String patString = "[123\\D]";
- String testString = "a";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- testString = "5";
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "3";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- //low surrogate
- testString = "\uDFC4";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- //high surrogate
- testString = "\uDADA";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- testString = "\uDADA\uDFC4";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "[123[^\\p{javaDigit}]]";
- testString = "a";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- testString = "5";
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "3";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- //low surrogate
- testString = "\uDFC4";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- //high surrogate
- testString = "\uDADA";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- testString = "\uDADA\uDFC4";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- //surrogate characters
- patString = "\\p{Cs}";
- testString = "\uD916\uDE27";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
-
- /*
- * see http://www.unicode.org/reports/tr18/#Supplementary_Characters
- * we have to treat text as code points not code units.
- * \\p{Cs} matches any surrogate character but here testString
- * is a one code point consisting of two code units (two surrogate
- * characters) so we find nothing
- */
- assertFalse(mat.find());
-
- //swap low and high surrogates
- testString = "\uDE27\uD916";
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- patString = "[\uD916\uDE271\uD91623&&[^\\p{Cs}]]";
- testString = "1";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- testString = "\uD916";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertFalse(mat.find());
-
- testString = "\uD916\uDE27";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.find());
-
- //\uD9A0\uDE8E=\u7828E
- //\u78281=\uD9A0\uDE81
- patString = "[a-\uD9A0\uDE8E]";
- testString = "\uD9A0\uDE81";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
- }
-
- public void testDotConstructionWithSurrogatesSupplementary() {
- String patString = ".";
- String testString = "\uD9A0\uDE81";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\uDE81";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\uD9A0";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\n";
- mat = pat.matcher(testString);
- assertFalse(mat.matches());
-
- patString = ".*\uDE81";
- testString = "\uD9A0\uDE81\uD9A0\uDE81\uD9A0\uDE81";
- pat = Pattern.compile(patString);
- mat = pat.matcher(testString);
- assertFalse(mat.matches());
-
- testString = "\uD9A0\uDE81\uD9A0\uDE81\uDE81";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- patString = ".*";
- testString = "\uD9A0\uDE81\n\uD9A0\uDE81\uD9A0\n\uDE81";
- pat = Pattern.compile(patString, Pattern.DOTALL);
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
- }
-
- public void testQuantifiersWithSurrogatesSupplementary() {
- String patString = "\uD9A0\uDE81*abc";
- String testString = "\uD9A0\uDE81\uD9A0\uDE81abc";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "abc";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
- }
-
- public void testAlternationsWithSurrogatesSupplementary() {
- String patString = "\uDE81|\uD9A0\uDE81|\uD9A0";
- String testString = "\uD9A0";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\uDE81";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\uD9A0\uDE81";
- mat = pat.matcher(testString);
- assertTrue(mat.matches());
-
- testString = "\uDE81\uD9A0";
- mat = pat.matcher(testString);
- assertFalse(mat.matches());
- }
-
- public void testGroupsWithSurrogatesSupplementary() {
-
- //this pattern matches nothing
- String patString = "(\uD9A0)\uDE81";
- String testString = "\uD9A0\uDE81";
- Pattern pat = Pattern.compile(patString);
- Matcher mat = pat.matcher(testString);
- assertFalse(mat.matches());
-
- patString = "(\uD9A0)";
- testString = "\uD9A0\uDE81";
- pat = Pattern.compile(patString, Pattern.DOTALL);
- mat = pat.matcher(testString);
- assertFalse(mat.find());
- }
-
- /*
- * Regression test for HARMONY-688
- */
- public void testUnicodeCategoryWithSurrogatesSupplementary() {
- Pattern p = Pattern.compile("\\p{javaLowerCase}");
- Matcher matcher = p.matcher("\uD801\uDC28");
- assertTrue(matcher.find());
- }
-
public static void main(String[] args) {
junit.textui.TestRunner.run(PatternTest.class);
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java Mon Oct 9 17:44:44 2006
@@ -132,22 +132,17 @@
assertEquals("c", s[3]);
assertEquals("d", s[4]);
assertEquals("", s[5]);
- }
- public void testSplitSupplementaryWithEmptyString() {
-
- /*
- * See http://www.unicode.org/reports/tr18/#Supplementary_Characters
- * We have to treat text as code points not code units.
- */
- Pattern p = Pattern.compile("");
- String s[];
+ // Match with a surrogate pair .. strangely splits the surrogate pair. I
+ // would have expected
+ // the third matched string to be "\ud869\uded6" (aka \u2a6d6)
s = p.split("a\ud869\uded6b", -1);
- assertEquals(5, s.length);
+ assertEquals(6, s.length);
assertEquals("", s[0]);
assertEquals("a", s[1]);
- assertEquals("\ud869\uded6", s[2]);
- assertEquals("b", s[3]);
- assertEquals("", s[4]);
+ assertEquals("\ud869", s[2]);
+ assertEquals("\uded6", s[3]);
+ assertEquals("b", s[4]);
+ assertEquals("", s[5]);
}
}