You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@harmony.apache.org by te...@apache.org on 2006/10/10 02:44:46 UTC
svn commit: r454575 [1/2] - in
/incubator/harmony/enhanced/classlib/trunk/modules/regex/src:
main/java/java/util/regex/
test/java/org/apache/harmony/tests/java/util/regex/
Author: tellison
Date: Mon Oct 9 17:44:44 2006
New Revision: 454575
URL: http://svn.apache.org/viewvc?view=rev&rev=454575
Log:
Backing out HARMONY-688 (java.util.regex.Matcher does not support Unicode supplementary characters).
Causes (or exposes) failure in java.util.Scanner -- under investigation.
Removed:
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeRangeSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/HighSurrogateCharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LowHighSurrogateRangeSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LowSurrogateCharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SupplCharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SupplRangeSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCISupplCharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCISupplRangeSet.java
Modified:
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Pattern.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/PosPlusGroupQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/RangeSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/ReluctantQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SequenceSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/SingleDecompositions.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCICharSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UCIRangeSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/UnifiedQuantifierSet.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/MatcherTest.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/PatternTest.java
incubator/harmony/enhanced/classlib/trunk/modules/regex/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AbstractCharClass.java Mon Oct 9 17:44:44 2006
@@ -37,26 +37,8 @@
abstract class AbstractCharClass extends SpecialToken {
protected boolean alt;
- protected boolean altSurrogates;
-
- //Character.MAX_SURROGATE - Character.MIN_SURROGATE + 1
- static int SURROGATE_CARDINALITY = 2048;
-
- BitSet lowHighSurrogates = new BitSet(SURROGATE_CARDINALITY);
-
- AbstractCharClass charClassWithoutSurrogates = null;
-
- AbstractCharClass charClassWithSurrogates = null;
-
static PredefinedCharacterClasses charClasses = new PredefinedCharacterClasses();
- /*
- * Indicates if this class may contain supplementary Unicode codepoints.
- * If this flag is specified it doesn't mean that this class contains
- * supplementary characters but may contain.
- */
- protected boolean mayContainSupplCodepoints = false;
-
/**
* Returns true if this char class contains character specified;
*
@@ -74,21 +56,7 @@
protected BitSet getBits() {
return null;
}
-
- protected BitSet getLowHighSurrogates() {
- return lowHighSurrogates;
- }
- public boolean hasLowHighSurrogates() {
- return altSurrogates
- ? lowHighSurrogates.nextClearBit(0) < SURROGATE_CARDINALITY
- : lowHighSurrogates.nextSetBit(0) < SURROGATE_CARDINALITY;
- }
-
- public boolean mayContainSupplCodepoints() {
- return mayContainSupplCodepoints;
- }
-
public int getType() {
return SpecialToken.TOK_CHARCLASS;
}
@@ -96,55 +64,7 @@
public AbstractCharClass getInstance() {
return this;
}
-
- public AbstractCharClass getSurrogates() {
-
- if (charClassWithSurrogates == null) {
- final BitSet lHS = getLowHighSurrogates();
-
- charClassWithSurrogates = new AbstractCharClass() {
- public boolean contains(int ch) {
- int index = ch - Character.MIN_SURROGATE;
- return ((index >= 0)
- && (index < AbstractCharClass.SURROGATE_CARDINALITY))
- ? this.altSurrogates ^ lHS.get(index)
- : false;
- }
- };
- charClassWithSurrogates.setNegative(this.altSurrogates);
- }
-
- return charClassWithSurrogates;
- }
-
- public AbstractCharClass getWithoutSurrogates() {
- if (charClassWithoutSurrogates == null) {
- final BitSet lHS = getLowHighSurrogates();
- final AbstractCharClass thisClass = this;
-
- charClassWithoutSurrogates = new AbstractCharClass() {
- public boolean contains(int ch) {
- int index = ch - Character.MIN_SURROGATE;
-
- boolean containslHS = ((index >= 0)
- && (index < AbstractCharClass.SURROGATE_CARDINALITY))
- ? this.altSurrogates ^ lHS.get(index)
- : false;
-
-
- return thisClass.contains(ch)
- && !containslHS;
- }
- };
- charClassWithoutSurrogates.setNegative(isNegative());
- charClassWithoutSurrogates.mayContainSupplCodepoints
- = mayContainSupplCodepoints;
- }
-
- return charClassWithoutSurrogates;
- }
-
public boolean hasUCI() {
return false;
}
@@ -162,13 +82,8 @@
* @see #union(CharClass)
*/
public AbstractCharClass setNegative(boolean value) {
- if (alt ^ value) {
+ if (alt ^ value)
alt = !alt;
- altSurrogates = !altSurrogates;
- }
- if (!mayContainSupplCodepoints) {
- mayContainSupplCodepoints = true;
- }
return this;
}
@@ -180,11 +95,11 @@
// Static methods and predefined classes
// -----------------------------------------------------------------
- public static boolean intersects(int ch1, int ch2) {
+ public static boolean intersects(char ch1, char ch2) {
return ch1 == ch2;
}
- public static boolean intersects(AbstractCharClass cc, int ch) {
+ public static boolean intersects(AbstractCharClass cc, char ch) {
return cc.contains(ch);
}
@@ -227,10 +142,7 @@
static class LazyNonDigit extends LazyDigit {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = super.computeValue().setNegative(true);
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
+ return super.computeValue().setNegative(true);
}
}
@@ -243,10 +155,7 @@
static class LazyNonSpace extends LazySpace {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = super.computeValue().setNegative(true);
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
+ return super.computeValue().setNegative(true);
}
}
@@ -259,10 +168,7 @@
static class LazyNonWord extends LazyWord {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = super.computeValue().setNegative(true);
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
+ return super.computeValue().setNegative(true);
}
}
@@ -345,8 +251,7 @@
}
public AbstractCharClass computeValue() {
- AbstractCharClass chCl = new CharClass().add(start, end);
- return chCl;
+ return new CharClass().add(start, end);
}
}
@@ -358,85 +263,45 @@
static class LazyCategoryScope extends LazyCharClass {
int category;
-
- boolean mayContainSupplCodepoints;
- boolean containsAllSurrogates;
-
- public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints) {
- this.mayContainSupplCodepoints = mayContainSupplCodepoints;
+ public LazyCategoryScope(int cat) {
this.category = cat;
}
- public LazyCategoryScope(int cat, boolean mayContainSupplCodepoints,
- boolean containsAllSurrogates) {
- this.containsAllSurrogates = containsAllSurrogates;
- this.mayContainSupplCodepoints = mayContainSupplCodepoints;
- this.category = cat;
- }
-
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new UnicodeCategoryScope(category);
- if (containsAllSurrogates) {
- chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
- }
-
- chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;;
- return chCl;
+ return new UnicodeCategoryScope(category);
}
}
static class LazyCategory extends LazyCharClass {
int category;
- boolean mayContainSupplCodepoints;
-
- boolean containsAllSurrogates;
-
- public LazyCategory(int cat, boolean mayContainSupplCodepoints) {
- this.mayContainSupplCodepoints = mayContainSupplCodepoints;
+ public LazyCategory(int cat) {
this.category = cat;
}
- public LazyCategory(int cat, boolean mayContainSupplCodepoints,
- boolean containsAllSurrogates) {
- this.containsAllSurrogates = containsAllSurrogates;
- this.mayContainSupplCodepoints = mayContainSupplCodepoints;
- this.category = cat;
- }
-
+
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new UnicodeCategory(category);
- if (containsAllSurrogates) {
- chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
- }
- chCl.mayContainSupplCodepoints = mayContainSupplCodepoints;;
- return chCl;
+ return new UnicodeCategory(category);
}
}
static class LazyJavaLowerCase extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isLowerCase(ch);
+ return Character.isLowerCase((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaUpperCase extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isUpperCase(ch);
+ return Character.isUpperCase((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
@@ -444,7 +309,7 @@
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isWhitespace(ch);
+ return Character.isWhitespace((char) ch);
}
};
}
@@ -454,7 +319,7 @@
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isMirrored(ch);
+ return Character.isMirrored((char) ch);
}
};
}
@@ -462,41 +327,31 @@
static class LazyJavaDefined extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isDefined(ch);
+ return Character.isDefined((char) ch);
}
};
- chCl.lowHighSurrogates.set(0, SURROGATE_CARDINALITY);
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaDigit extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isDigit(ch);
+ return Character.isDigit((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaIdentifierIgnorable extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isIdentifierIgnorable(ch);
+ return Character.isIdentifierIgnorable((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
@@ -504,7 +359,7 @@
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isISOControl(ch);
+ return Character.isISOControl((char) ch);
}
};
}
@@ -512,53 +367,41 @@
static class LazyJavaJavaIdentifierPart extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isJavaIdentifierPart(ch);
+ return Character.isJavaIdentifierPart((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaJavaIdentifierStart extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isJavaIdentifierStart(ch);
+ return Character.isJavaIdentifierStart((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaLetter extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isLetter(ch);
+ return Character.isLetter((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaLetterOrDigit extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isLetterOrDigit(ch);
+ return Character.isLetterOrDigit((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
@@ -566,7 +409,7 @@
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isSpaceChar(ch);
+ return Character.isSpaceChar((char) ch);
}
};
}
@@ -576,7 +419,7 @@
protected AbstractCharClass computeValue() {
return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isTitleCase(ch);
+ return Character.isTitleCase((char) ch);
}
};
}
@@ -584,30 +427,24 @@
static class LazyJavaUnicodeIdentifierPart extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isUnicodeIdentifierPart(ch);
+ return Character.isUnicodeIdentifierPart((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
static class LazyJavaUnicodeIdentifierStart extends LazyCharClass {
protected AbstractCharClass computeValue() {
- AbstractCharClass chCl = new AbstractCharClass() {
+ return new AbstractCharClass() {
public boolean contains(int ch) {
- return Character.isUnicodeIdentifierStart(ch);
+ return Character.isUnicodeIdentifierStart((char) ch);
}
};
-
- chCl.mayContainSupplCodepoints = true;
- return chCl;
}
}
-
+
/**
* character classes generated from
* http://www.unicode.org/reports/tr18/
@@ -783,43 +620,44 @@
{ "ArabicPresentationForms-B", new LazyRange(0xFE70, 0xFEFF) }, //$NON-NLS-1$
{ "HalfwidthandFullwidthForms", new LazyRange(0xFF00, 0xFFEF) }, //$NON-NLS-1$
{ "Specials", new LazySpecialsBlock() }, //$NON-NLS-1$
- { "Cn", new LazyCategory(Character.UNASSIGNED, true) },
- { "IsL", new LazyCategoryScope(0x3E, true) },
- { "Lu", new LazyCategory(Character.UPPERCASE_LETTER, true) },
- { "Ll", new LazyCategory(Character.LOWERCASE_LETTER, true) },
- { "Lt", new LazyCategory(Character.TITLECASE_LETTER, false) },
- { "Lm", new LazyCategory(Character.MODIFIER_LETTER, false) },
- { "Lo", new LazyCategory(Character.OTHER_LETTER, true) },
- { "IsM", new LazyCategoryScope(0x1C0, true) },
- { "Mn", new LazyCategory(Character.NON_SPACING_MARK, true) },
- { "Me", new LazyCategory(Character.ENCLOSING_MARK, false) },
- { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK, true) },
- { "N", new LazyCategoryScope(0xE00, true) },
- { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER, true) },
- { "Nl", new LazyCategory(Character.LETTER_NUMBER, true) },
- { "No", new LazyCategory(Character.OTHER_NUMBER, true) },
- { "IsZ", new LazyCategoryScope(0x7000, false) },
- { "Zs", new LazyCategory(Character.SPACE_SEPARATOR, false) },
- { "Zl", new LazyCategory(Character.LINE_SEPARATOR, false) },
- { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR, false) },
- { "IsC", new LazyCategoryScope(0xF0000, true, true) },
- { "Cc", new LazyCategory(Character.CONTROL, false) },
- { "Cf", new LazyCategory(Character.FORMAT, true) },
- { "Co", new LazyCategory(Character.PRIVATE_USE, true) },
- { "Cs", new LazyCategory(Character.SURROGATE, false, true) },
- { "IsP", new LazyCategoryScope(0xF8000, true) },
- { "Pd", new LazyCategory(Character.DASH_PUNCTUATION, false) },
- { "Ps", new LazyCategory(Character.START_PUNCTUATION, false) },
- { "Pe", new LazyCategory(Character.END_PUNCTUATION, false) },
- { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION, false) },
- { "Po", new LazyCategory(Character.OTHER_PUNCTUATION, true) },
- { "IsS", new LazyCategoryScope(0x7E000000, true) },
- { "Sm", new LazyCategory(Character.MATH_SYMBOL, true) },
- { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL, false) },
- { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL, false) },
- { "So", new LazyCategory(Character.OTHER_SYMBOL, true) },
- { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION, false) },
- { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION, false) } };
+ { "Cn", new LazyCategory(Character.UNASSIGNED) }, //$NON-NLS-1$
+ { "IsL", new LazyCategoryScope(0x3E) }, //$NON-NLS-1$
+ { "Lu", new LazyCategory(Character.UPPERCASE_LETTER) }, //$NON-NLS-1$
+ { "Ll", new LazyCategory(Character.LOWERCASE_LETTER) }, //$NON-NLS-1$
+ { "Lt", new LazyCategory(Character.TITLECASE_LETTER) }, //$NON-NLS-1$
+ { "Lm", new LazyCategory(Character.MODIFIER_LETTER) }, //$NON-NLS-1$
+ { "Lo", new LazyCategory(Character.OTHER_LETTER) }, //$NON-NLS-1$
+ { "IsM", new LazyCategoryScope(0x1C0) }, //$NON-NLS-1$
+ { "Mn", new LazyCategory(Character.NON_SPACING_MARK) }, //$NON-NLS-1$
+ { "Me", new LazyCategory(Character.ENCLOSING_MARK) }, //$NON-NLS-1$
+ { "Mc", new LazyCategory(Character.COMBINING_SPACING_MARK) }, //$NON-NLS-1$
+ { "N", new LazyCategoryScope(0xE00) }, //$NON-NLS-1$
+ { "Nd", new LazyCategory(Character.DECIMAL_DIGIT_NUMBER) }, //$NON-NLS-1$
+ { "Nl", new LazyCategory(Character.LETTER_NUMBER) }, //$NON-NLS-1$
+ { "No", new LazyCategory(Character.OTHER_NUMBER) }, //$NON-NLS-1$
+ { "IsZ", new LazyCategoryScope(0x7000) }, //$NON-NLS-1$
+ { "Zs", new LazyCategory(Character.SPACE_SEPARATOR) }, //$NON-NLS-1$
+ { "Zl", new LazyCategory(Character.LINE_SEPARATOR) }, //$NON-NLS-1$
+ { "Zp", new LazyCategory(Character.PARAGRAPH_SEPARATOR) }, //$NON-NLS-1$
+ { "IsC", new LazyCategoryScope(0xF0000) }, //$NON-NLS-1$
+ { "Cc", new LazyCategory(Character.CONTROL) }, //$NON-NLS-1$
+ { "Cf", new LazyCategory(Character.FORMAT) }, //$NON-NLS-1$
+ { "Co", new LazyCategory(Character.PRIVATE_USE) }, //$NON-NLS-1$
+ { "Cs", new LazyCategory(Character.SURROGATE) }, //$NON-NLS-1$
+ { "IsP", new LazyCategoryScope(0xF8000) }, //$NON-NLS-1$
+ { "Pd", new LazyCategory(Character.DASH_PUNCTUATION) }, //$NON-NLS-1$
+ { "Ps", new LazyCategory(Character.START_PUNCTUATION) }, //$NON-NLS-1$
+ { "Pe", new LazyCategory(Character.END_PUNCTUATION) }, //$NON-NLS-1$
+ { "Pc", new LazyCategory(Character.CONNECTOR_PUNCTUATION) }, //$NON-NLS-1$
+ { "Po", new LazyCategory(Character.OTHER_PUNCTUATION) }, //$NON-NLS-1$
+ { "IsS", new LazyCategoryScope(0x7E000000) }, //$NON-NLS-1$
+ { "Sm", new LazyCategory(Character.MATH_SYMBOL) }, //$NON-NLS-1$
+ { "Sc", new LazyCategory(Character.CURRENCY_SYMBOL) }, //$NON-NLS-1$
+ { "Sk", new LazyCategory(Character.MODIFIER_SYMBOL) }, //$NON-NLS-1$
+ { "So", new LazyCategory(Character.OTHER_SYMBOL) }, //$NON-NLS-1$
+ { "Pi", new LazyCategory(Character.INITIAL_QUOTE_PUNCTUATION) }, //$NON-NLS-1$
+ { "Pf", new LazyCategory(Character.FINAL_QUOTE_PUNCTUATION) } }; //$NON-NLS-1$
+
public Object[][] getContents() {
return contents;
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/AltQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -35,6 +35,7 @@
public int matches(int stringIndex, CharSequence testString,
MatchResultImpl matchResult) {
+ int i = 0;
int shift = 0;
if ((shift = innerSet.matches(stringIndex, testString, matchResult)) >= 0) {
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharClass.java Mon Oct 9 17:44:44 2006
@@ -40,8 +40,6 @@
// Flag indicates if there are unicode supplements
boolean hasUCI = false;
- boolean invertedSurrogates = false;
-
boolean inverted = false;
boolean hideBits = false;
@@ -63,10 +61,6 @@
setNegative(negative);
}
- /*
- * We can use this method safely even if nonBitSet != null
- * due to specific of range constrcutions in regular expressions.
- */
public CharClass add(int ch) {
if (ci) {
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
@@ -77,151 +71,52 @@
}
} else if (uci && ch > 128) {
hasUCI = true;
- ch = Character.toLowerCase(Character.toUpperCase(ch));
+ ch = Character.toLowerCase(Character.toUpperCase((char) ch));
// return this;
}
}
-
- if (Lexer.isHighSurrogate(ch) || Lexer.isLowSurrogate(ch)) {
- if (!invertedSurrogates) {
- lowHighSurrogates.set(ch - Character.MIN_SURROGATE);
- } else {
- lowHighSurrogates.clear(ch - Character.MIN_SURROGATE);
- }
- }
-
if (!inverted) {
bits.set(ch);
} else
- bits.clear(ch);
+ bits.clear();
- if (!mayContainSupplCodepoints && Character.isSupplementaryCodePoint(ch)) {
- mayContainSupplCodepoints = true;
- }
-
return this;
}
- /*
- * The difference between add(AbstarctCharClass) and union(AbstractCharClass)
- * is that add() is used for constructions like "[^abc\\d]"
- * (this pattern doesn't match "1")
- * while union is used for constructions like "[^abc[\\d]]"
- * (this pattern matches "1").
- */
public CharClass add(final AbstractCharClass cc) {
-
- if (!mayContainSupplCodepoints && cc.mayContainSupplCodepoints) {
- mayContainSupplCodepoints = true;
- }
-
- if (!invertedSurrogates) {
-
- //A | !B = ! ((A ^ B) & B)
- if (cc.altSurrogates) {
- lowHighSurrogates.xor(cc.getLowHighSurrogates());
- lowHighSurrogates.and(cc.getLowHighSurrogates());
- altSurrogates = !altSurrogates;
- invertedSurrogates = true;
-
- //A | B
- } else {
- lowHighSurrogates.or(cc.getLowHighSurrogates());
- }
- } else {
-
- //!A | !B = !(A & B)
- if (cc.altSurrogates) {
- lowHighSurrogates.and(cc.getLowHighSurrogates());
-
- //!A | B = !(A & !B)
- } else {
- lowHighSurrogates.andNot(cc.getLowHighSurrogates());
- }
- }
-
- if (!hideBits && cc.getBits() != null) {
+ if (cc.getBits() != null) {
if (!inverted) {
-
- //A | !B = ! ((A ^ B) & B)
if (cc.isNegative()) {
bits.xor(cc.getBits());
bits.and(cc.getBits());
alt = !alt;
inverted = true;
-
- //A | B
} else {
bits.or(cc.getBits());
}
} else {
-
- //!A | !B = !(A & B)
if (cc.isNegative()) {
bits.and(cc.getBits());
-
- //!A | B = !(A & !B)
} else {
bits.andNot(cc.getBits());
}
}
- } else {
- final boolean curAlt = alt;
-
+ } else {
if (nonBitSet == null) {
-
- if (curAlt && !inverted && bits.isEmpty()) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return cc.contains(ch);
- }
- };
- //alt = true;
- } else {
-
- /*
- * We keep the value of alt unchanged for
- * constructions like [^[abc]fgb] by using
- * the formula a ^ b == !a ^ !b.
- */
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !((curAlt ^ bits.get(ch))
- || ((curAlt ^ inverted) ^ cc.contains(ch)));
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return (curAlt ^ bits.get(ch))
- || ((curAlt ^ inverted) ^ cc.contains(ch));
- }
- };
- //alt = false
+ // hide bits true at the moment
+ nonBitSet = new AbstractCharClass() {
+ public boolean contains(int ch) {
+ return cc.contains(ch) || bits.get(ch);
}
- }
-
- hideBits = true;
+ };
+ hideBits = true;
} else {
final AbstractCharClass nb = nonBitSet;
-
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !(curAlt ^ (nb.contains(ch) || cc.contains(ch)));
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return curAlt ^ (nb.contains(ch) || cc.contains(ch));
- }
- };
- //alt = false
- }
+ nonBitSet = new AbstractCharClass() {
+ public boolean contains(int ch) {
+ return nb.contains(ch) || cc.contains(ch);
+ }
+ };
}
}
@@ -231,11 +126,7 @@
public CharClass add(int st, int end) {
if (st > end)
throw new IllegalArgumentException();
- if (!ci
-
- //no intersection with surrogate characters
- && (end < Character.MIN_SURROGATE
- || st > Character.MAX_SURROGATE)) {
+ if (!ci) {
if (!inverted) {
bits.set(st, end + 1);
} else {
@@ -248,247 +139,81 @@
}
return this;
}
-
+
// OR operation
public void union(final AbstractCharClass clazz) {
- if (!mayContainSupplCodepoints
- && clazz.mayContainSupplCodepoints) {
- mayContainSupplCodepoints = true;
- }
-
if (clazz.hasUCI())
this.hasUCI = true;
-
-
- if (altSurrogates ^ clazz.altSurrogates) {
-
- //!A | B = !(A & !B)
- if (altSurrogates) {
- lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
-
- //A | !B = !((A ^ B) & B)
- } else {
- lowHighSurrogates.xor(clazz.getLowHighSurrogates());
- lowHighSurrogates.and(clazz.getLowHighSurrogates());
- altSurrogates = true;
- }
-
- } else {
-
- //!A | !B = !(A & B)
- if (altSurrogates) {
- lowHighSurrogates.and(clazz.getLowHighSurrogates());
-
- //A | B
- } else {
- lowHighSurrogates.or(clazz.getLowHighSurrogates());
- }
- }
-
if (!hideBits && clazz.getBits() != null) {
if (alt ^ clazz.isNegative()) {
-
- //!A | B = !(A & !B)
if (alt) {
bits.andNot(clazz.getBits());
-
- //A | !B = !((A ^ B) & B)
} else {
bits.xor(clazz.getBits());
bits.and(clazz.getBits());
- alt = true;
}
-
+ alt = true;
} else {
-
- //!A | !B = !(A & B)
- if (alt) {
+ if (alt) {
bits.and(clazz.getBits());
-
- //A | B
- } else {
+ } else {
bits.or(clazz.getBits());
}
}
} else {
- final boolean curAlt = alt;
-
if (nonBitSet == null) {
-
- if (!inverted && bits.isEmpty()) {
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !clazz.contains(ch);
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return clazz.contains(ch);
- }
- };
- //alt = false
- }
- } else {
-
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !(clazz.contains(ch) || (curAlt ^ bits.get(ch)));
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return clazz.contains(ch) || (curAlt ^ bits.get(ch));
- }
- };
- //alt = false
+ nonBitSet = new AbstractCharClass() {
+ public boolean contains(int ch) {
+ return clazz.contains(ch) || bits.get(ch);
}
- }
+ };
hideBits = true;
} else {
final AbstractCharClass nb = nonBitSet;
-
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !((curAlt ^ nb.contains(ch)) || clazz.contains(ch));
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return (curAlt ^ nb.contains(ch)) || clazz.contains(ch);
- }
- };
- //alt = false
- }
+ nonBitSet = new AbstractCharClass() {
+ public boolean contains(int ch) {
+ return nb.contains(ch) || clazz.contains(ch);
+ }
+ };
}
}
}
// AND operation
public void intersection(final AbstractCharClass clazz) {
- if (!mayContainSupplCodepoints
- && clazz.mayContainSupplCodepoints) {
- mayContainSupplCodepoints = true;
- }
-
if (clazz.hasUCI())
this.hasUCI = true;
-
- if (altSurrogates ^ clazz.altSurrogates) {
-
- //!A & B = ((A ^ B) & B)
- if (altSurrogates) {
- lowHighSurrogates.xor(clazz.getLowHighSurrogates());
- lowHighSurrogates.and(clazz.getLowHighSurrogates());
- altSurrogates = false;
-
- //A & !B
- } else {
- lowHighSurrogates.andNot(clazz.getLowHighSurrogates());
- }
- } else {
-
- //!A & !B = !(A | B)
- if (altSurrogates) {
- lowHighSurrogates.or(clazz.getLowHighSurrogates());
-
- //A & B
- } else {
- lowHighSurrogates.and(clazz.getLowHighSurrogates());
- }
- }
-
if (!hideBits && clazz.getBits() != null) {
-
if (alt ^ clazz.isNegative()) {
-
- //!A & B = ((A ^ B) & B)
if (alt) {
bits.xor(clazz.getBits());
bits.and(clazz.getBits());
- alt = false;
-
- //A & !B
+ setNegative(false);
} else {
bits.andNot(clazz.getBits());
}
} else {
-
- //!A & !B = !(A | B)
if (alt) {
bits.or(clazz.getBits());
-
- //A & B
} else {
bits.and(clazz.getBits());
}
}
} else {
- final boolean curAlt = alt;
-
- if (nonBitSet == null) {
-
- if (!inverted && bits.isEmpty()) {
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !clazz.contains(ch);
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return clazz.contains(ch);
- }
- };
- //alt = false
- }
- } else {
-
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !(clazz.contains(ch) && (curAlt ^ bits.get(ch)));
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return clazz.contains(ch) && (curAlt ^ bits.get(ch));
- }
- };
- //alt = false
+ if (nonBitSet == null) {
+ nonBitSet = new AbstractCharClass() {
+ public boolean contains(int ch) {
+ return bits.get(ch) && clazz.contains(ch);
}
- }
+ };
hideBits = true;
} else {
final AbstractCharClass nb = nonBitSet;
-
- if (curAlt) {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return !((curAlt ^ nb.contains(ch)) && clazz.contains(ch));
- }
- };
- //alt = true
- } else {
- nonBitSet = new AbstractCharClass() {
- public boolean contains(int ch) {
- return (curAlt ^ nb.contains(ch)) && clazz.contains(ch);
- }
- };
- //alt = false
- }
+ nonBitSet = new AbstractCharClass() {
+ public boolean contains(int ch) {
+ return nb.contains(ch) && clazz.contains(ch);
+ }
+ };
}
}
}
@@ -519,15 +244,9 @@
return bits;
}
- protected BitSet getLowHighSurrogates() {
- return lowHighSurrogates;
- }
-
public AbstractCharClass getInstance() {
-
if (nonBitSet == null) {
final BitSet bs = getBits();
-
AbstractCharClass res = new AbstractCharClass() {
public boolean contains(int ch) {
return this.alt ^ bs.get(ch);
@@ -537,7 +256,7 @@
StringBuffer temp = new StringBuffer();
for (int i = bs.nextSetBit(0); i >= 0; i = bs
.nextSetBit(i + 1)) {
- temp.append(Character.toChars(i));
+ temp.append((char) i);
temp.append('|');
}
@@ -554,11 +273,10 @@
}
}
- //for debugging purposes only
public String toString() {
StringBuffer temp = new StringBuffer();
for (int i = bits.nextSetBit(0); i >= 0; i = bits.nextSetBit(i + 1)) {
- temp.append(Character.toChars(i));
+ temp.append((char) i);
temp.append('|');
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CharSet.java Mon Oct 9 17:44:44 2006
@@ -45,48 +45,41 @@
public int find(int strIndex, CharSequence testString,
MatchResultImpl matchResult) {
- if (testString instanceof String) {
- String testStr = (String) testString;
- int strLength = matchResult.getRightBound();
-
- while (strIndex < strLength) {
- strIndex = testStr.indexOf(ch, strIndex);
- if (strIndex < 0)
- return -1;
- if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
- return strIndex;
- }
- strIndex++;
+ boolean res = false;
+ String testStr = testString.toString();
+ int strLength = matchResult.getRightBound();
+
+ while (strIndex < strLength) {
+ strIndex = testStr.indexOf(ch, strIndex);
+ if (strIndex < 0)
+ return -1;
+ if (next.matches(strIndex + 1, testString, matchResult) >= 0) {
+ return strIndex;
}
-
- return -1;
+ strIndex++;
}
-
- return super.find(strIndex, testString, matchResult);
+
+ return -1;
}
public int findBack(int strIndex, int lastIndex, CharSequence testString,
MatchResultImpl matchResult) {
- if (testString instanceof String) {
- String testStr = (String) testString;
+ String testStr = testString.toString();
- while (lastIndex >= strIndex) {
- lastIndex = testStr.lastIndexOf(ch, lastIndex);
- if (lastIndex < 0 || lastIndex < strIndex) {
- return -1;
- }
-
- if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
- return lastIndex;
- }
+ while (lastIndex >= strIndex) {
+ lastIndex = testStr.lastIndexOf(ch, lastIndex);
+ if (lastIndex < 0 || lastIndex < strIndex) {
+ return -1;
+ }
- lastIndex--;
+ if (next.matches(lastIndex + 1, testString, matchResult) >= 0) {
+ return lastIndex;
}
- return -1;
+ lastIndex--;
}
-
- return super.findBack(strIndex, lastIndex, testString, matchResult);
+
+ return -1;
}
protected String getName() {
@@ -102,10 +95,6 @@
return ((CharSet) set).getChar() == ch;
} else if (set instanceof RangeSet) {
return ((RangeSet) set).accepts(0, Character.toString(ch)) > 0;
- } else if (set instanceof SupplRangeSet) {
- return ((SupplRangeSet) set).contains(ch);
- } else if (set instanceof SupplCharSet) {
- return false;
}
return true;
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/CompositeQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -71,7 +71,7 @@
if (shift >= 0) {
return shift;
}
- stringIndex -= leaf.charCount();
+ stringIndex--;
}
return -1;
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DecomposedCharSet.java Mon Oct 9 17:44:44 2006
@@ -198,7 +198,7 @@
StringBuffer strBuff = new StringBuffer();
for (int i = 0; i < decomposedCharLength; i++) {
- strBuff.append(Character.toChars(decomposedChar[i]));
+ strBuff.append(Lexer.toChars(decomposedChar[i]));
}
decomposedCharUTF16 = strBuff.toString();
}
@@ -231,9 +231,9 @@
char high = testString.charAt(strIndex++);
char low = testString.charAt(strIndex);
- if (Character.isSurrogatePair(high, low)) {
+ if (Lexer.isSurrogatePair(high, low)) {
char [] curCodePointUTF16 = new char [] {high, low};
- curChar = Character.codePointAt(curCodePointUTF16, 0);
+ curChar = Lexer.codePointAt(curCodePointUTF16, 0);
readCharsForCodePoint = 2;
} else {
curChar = high;
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -28,9 +28,9 @@
* @author Nikolay A. Kuznetsov
* @version $Revision: 1.8.2.2 $
*/
-class DotAllQuantifierSet extends QuantifierSet {
+class DotAllQuantifierSet extends LeafQuantifierSet {
- public DotAllQuantifierSet(AbstractSet innerSet, AbstractSet next, int type) {
+ public DotAllQuantifierSet(LeafSet innerSet, AbstractSet next, int type) {
super(innerSet, next, type);
}
@@ -53,9 +53,5 @@
} else {
return -1;
}
- }
-
- protected String getName() {
- return "<DotAllQuant>";
}
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotAllSet.java Mon Oct 9 17:44:44 2006
@@ -27,48 +27,17 @@
* @author Nikolay A. Kuznetsov
* @version $Revision: 1.6.2.2 $
*/
-class DotAllSet extends JointSet {
+class DotAllSet extends LeafSet {
- public int matches(int stringIndex, CharSequence testString,
- MatchResultImpl matchResult) {
- int strLength = matchResult.getRightBound();
-
- if (stringIndex + 1 > strLength) {
- matchResult.hitEnd = true;
- return -1;
- }
-
- char high = testString.charAt(stringIndex);
-
- if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
- char low = testString.charAt(stringIndex + 1);
-
- if (Character.isSurrogatePair(high, low)) {
- return next.matches(stringIndex + 2, testString, matchResult);
- }
- }
- return next.matches(stringIndex + 1, testString, matchResult);
- }
+ public int accepts(int strIndex, CharSequence testString) {
+ return 1;
+ }
protected String getName() {
return "DotAll"; //$NON-NLS-1$
}
-
- public AbstractSet getNext() {
- return this.next;
- }
-
- public void setNext(AbstractSet next) {
- this.next = next;
- }
-
public int getType() {
return AbstractSet.TYPE_DOTSET;
- }
-
-
- public boolean hasConsumed(MatchResultImpl matchResult) {
- return true;
}
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -29,11 +29,11 @@
* @author Nikolay A. Kuznetsov
* @version $Revision: 1.11.2.2 $
*/
-class DotQuantifierSet extends QuantifierSet {
+class DotQuantifierSet extends LeafQuantifierSet {
AbstractLineTerminator lt;
- public DotQuantifierSet(AbstractSet innerSet, AbstractSet next, int type,
+ public DotQuantifierSet(LeafSet innerSet, AbstractSet next, int type,
AbstractLineTerminator lt) {
super(innerSet, next, type);
this.lt = lt;
@@ -48,7 +48,7 @@
findLineTerminator(stringIndex, strLength, testString);
if (startSearch < 0) {
- startSearch = strLength;
+ startSearch = matchResult.getRightBound();
}
if (startSearch <= stringIndex) {
@@ -97,9 +97,6 @@
return res;
}
- /*
- * All line terminators are from Basic Multilingual Pane
- */
private int findLineTerminator(int from, int to, CharSequence testString) {
for (int i = from; i < to; i++) {
if (lt.isLineTerminator(testString.charAt(i))) {
@@ -118,7 +115,4 @@
return -1;
}
- protected String getName() {
- return "<DotQuant>";
- }
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/DotSet.java Mon Oct 9 17:44:44 2006
@@ -27,7 +27,7 @@
* @author Nikolay A. Kuznetsov
* @version $Revision: 1.12.2.2 $
*/
-final class DotSet extends JointSet {
+final class DotSet extends LeafSet {
AbstractLineTerminator lt;
@@ -36,47 +36,21 @@
this.lt = lt;
}
- public int matches(int stringIndex, CharSequence testString,
- MatchResultImpl matchResult) {
- int strLength = matchResult.getRightBound();
-
- if (stringIndex + 1 > strLength) {
- matchResult.hitEnd = true;
- return -1;
- }
- char high = testString.charAt(stringIndex);
-
- if (Character.isHighSurrogate(high) && (stringIndex + 2 <= strLength)) {
- char low = testString.charAt(stringIndex + 1);
-
- if (Character.isSurrogatePair(high, low)) {
- return lt.isLineTerminator(Character.toCodePoint(high, low))? -1
- : next.matches(stringIndex + 2, testString, matchResult);
- }
- }
-
- return lt.isLineTerminator(high)? -1
- : next.matches(stringIndex + 1, testString, matchResult);
+ public int accepts(int strIndex, CharSequence testString) {
+ char ch = testString.charAt(strIndex);
+ return lt.isLineTerminator(ch) ? -1 : 1;
+
+ /*
+ * return (strIndex<testString.length() && testString.charAt(strIndex) !=
+ * '\n') ? 1 : -1;
+ */
}
protected String getName() {
return "."; //$NON-NLS-1$
}
-
- public AbstractSet getNext() {
- return this.next;
- }
-
- public void setNext(AbstractSet next) {
- this.next = next;
- }
-
public int getType() {
return AbstractSet.TYPE_DOTSET;
}
-
- public boolean hasConsumed(MatchResultImpl matchResult) {
- return true;
- }
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/EmptySet.java Mon Oct 9 17:44:44 2006
@@ -41,70 +41,6 @@
return 0;
}
- public int find(int stringIndex, CharSequence testString,
- MatchResultImpl matchResult) {
- int strLength = matchResult.getRightBound();
- int startStr = matchResult.getLeftBound();
-
- while (stringIndex <= strLength) {
-
- //check for supplementary codepoints
- if (stringIndex < strLength) {
- char low = testString.charAt(stringIndex);
-
- if (Character.isLowSurrogate(low)) {
-
- if (stringIndex > startStr) {
- char high = testString.charAt(stringIndex - 1);
- if (Character.isHighSurrogate(high)) {
- stringIndex++;
- continue;
- }
- }
- }
- }
-
- if (next.matches(stringIndex, testString, matchResult) >= 0) {
- return stringIndex;
- }
- stringIndex++;
- }
-
- return -1;
- }
-
- public int findBack(int stringIndex, int startSearch,
- CharSequence testString, MatchResultImpl matchResult) {
- int strLength = matchResult.getRightBound();
- int startStr = matchResult.getLeftBound();
-
- while (startSearch >= stringIndex) {
-
- //check for supplementary codepoints
- if (startSearch < strLength) {
- char low = testString.charAt(startSearch);
-
- if (Character.isLowSurrogate(low)) {
-
- if (startSearch > startStr) {
- char high = testString.charAt(startSearch - 1);
- if (Character.isHighSurrogate(high)) {
- startSearch--;
- continue;
- }
- }
- }
- }
-
- if (next.matches(startSearch, testString, matchResult) >= 0) {
- return startSearch;
- }
- startSearch--;
- }
-
- return -1;
- }
-
/*
* @see java.util.regex.AbstractSet#getName()
*/
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/LeafQuantifierSet.java Mon Oct 9 17:44:44 2006
@@ -53,7 +53,7 @@
return shift;
}
- stringIndex -= leaf.charCount();
+ stringIndex--;
}
return -1;
}
Modified: incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java
URL: http://svn.apache.org/viewvc/incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java?view=diff&rev=454575&r1=454574&r2=454575
==============================================================================
--- incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java (original)
+++ incubator/harmony/enhanced/classlib/trunk/modules/regex/src/main/java/java/util/regex/Lexer.java Mon Oct 9 17:44:44 2006
@@ -124,6 +124,9 @@
*/
static final int MAX_HANGUL_DECOMPOSITION_LENGTH = 3;
+ //maximum value of codepoint for basic multilingual pane of Unicode
+ static final int MAX_CODEPOINT_BASIC_MULTILINGUAL_PANE = 0xFFFF;
+
/*
* Following constants are needed for Hangul canonical decomposition.
* Hangul decomposition algorithm and constants are taken according
@@ -361,8 +364,8 @@
singleDecompTable = SingleDecompositions.getHashSingleDecompositions();
singleDecompTableSize = singleDecompTable.size;
- for (int i = 0; i < inputLength; i += Character.charCount(ch)) {
- ch = Character.codePointAt(inputChars, i);
+ for (int i = 0; i < inputLength; i += Lexer.charCount(ch)) {
+ ch = Lexer.codePointAt(inputChars, i);
inputCodePoints[inputCodePointsIndex++] = ch;
}
@@ -423,7 +426,7 @@
* Translating into UTF-16 encoding
*/
for (int i = 0; i < decompHangulIndex; i++) {
- result.append(Character.toChars(decompHangul[i]));
+ result.append(Lexer.toChars(decompHangul[i]));
}
return result.toString();
@@ -441,7 +444,7 @@
static int [] getCanonicalOrder(int [] inputInts, int length) {
int inputLength = (length < inputInts.length)
? length
- : inputInts.length;
+ : inputInts.length;
/*
* Simple bubble-sort algorithm.
@@ -507,23 +510,19 @@
reread = false;
// read next character analize it and construct token:
// //
-
- lookAhead = (index < pattern.length) ? nextCodePoint() : 0;
+ lookAhead = (index < pattern.length) ? pattern[nextIndex()] : 0;
lookAheadST = null;
if (mode == Lexer.MODE_ESCAPE) {
if (lookAhead == '\\') {
-
- //need not care about supplementary codepoints here
lookAhead = (index < pattern.length) ? pattern[nextIndex()]
: 0;
switch (lookAhead) {
case 'E': {
mode = saved_mode;
-
lookAhead = (index <= pattern.length - 2)
- ? nextCodePoint()
+ ? pattern[nextIndex()]
: 0;
break;
}
@@ -540,8 +539,7 @@
}
if (lookAhead == '\\') {
-
- lookAhead = (index < pattern.length - 2) ? nextCodePoint()
+ lookAhead = (index < pattern.length - 2) ? pattern[nextIndex()]
: -1;
switch (lookAhead) {
case -1:
@@ -650,8 +648,6 @@
break;
case 'c': {
if (index < pattern.length - 2) {
-
- //need not care about supplementary codepoints here
lookAhead = (pattern[nextIndex()] & 0x1f);
break;
} else {
@@ -966,8 +962,6 @@
* Returns true if current character is plain token.
*/
public static boolean isLetter(int ch) {
-
- //all supplementary codepoints have integer value that is >= 0;
return ch >= 0;
}
@@ -981,28 +975,6 @@
return !isEmpty() && !isSpecial() && isLetter(ch);
}
- /*
- * Note that Character class methods
- * isHighSurrogate(), isLowSurrogate()
- * take char parameter while we need an int
- * parameter without truncation to char value
- */
- public boolean isHighSurrogate() {
- return (ch <= 0xDBFF) && (ch >= 0xD800);
- }
-
- public boolean isLowSurrogate() {
- return (ch <= 0xDFFF) && (ch >= 0xDC00);
- }
-
- public static boolean isHighSurrogate(int ch) {
- return (ch <= 0xDBFF) && (ch >= 0xD800);
- }
-
- public static boolean isLowSurrogate(int ch) {
- return (ch <= 0xDFFF) && (ch >= 0xDC00);
- }
-
/**
* Process hexadecimal integer.
*/
@@ -1058,7 +1030,7 @@
}
/**
- * Process expression flags given with (?idmsux-idmsux)
+ * Process expression flags givent with (?idmsux-idmsux)
*/
private int readFlags() {
char ch;
@@ -1191,7 +1163,7 @@
* "3.12 Conjoining Jamo Behavior".
*
* @param ch - given Hangul syllable
- * @return canonical decomposition of ch.
+ * @return canonical decoposition of ch.
*/
static int [] getHangulDecomposition(int ch) {
int SIndex = ch - SBase;
@@ -1229,6 +1201,59 @@
? 0
: canClass;
}
+
+ /**
+ * Simple stub to Character.charCount().
+ *
+ * @param - ch Unicode codepoint
+ * @return number of chars that are occupied by Unicode
+ * codepoint ch in UTF-16 encoding.
+ */
+ final static int charCount(int ch) {
+
+ //return Character.charCount(ch);
+ return 1;
+ }
+
+ /**
+ * Simple stub to Character.codePointAt().
+ *
+ * @param - source
+ * @param - index
+ * @return Unicode codepoint at given index at source.
+ * Note that codepoint can reside in two adjacent chars.
+ */
+ final static int codePointAt(char [] source, int index) {
+
+ //return Character.codePointAt(source, index);
+ return source[index];
+ }
+
+ /**
+ * Simple stub to Character.toChars().
+ *
+ * @param - ch Unicode codepoint
+ * @return UTF-16 encoding of given code point.
+ */
+ final static char [] toChars(int ch) {
+
+ //return Character.toChars(ch);
+ return new char [] {(char) ch};
+ }
+
+ /**
+ * Simple stub to Character.isSurrogatePair().
+ *
+ * @param high high-surrogate char
+ * @param low low-surrogate char
+ * @return true if high and low compose an UTF-16 encoding
+ * of some Unicode codepoint (we call such codepoint "surrogate")
+ */
+ final static boolean isSurrogatePair(char high, char low) {
+
+ //return Character.isSurrogatePair(char, low)
+ return false;
+ }
/**
* Tests if given codepoint is a canonical decomposition of another
@@ -1259,25 +1284,38 @@
static boolean hasDecompositionNonNullCanClass(int ch) {
return ch == 0x0340 | ch == 0x0341 | ch == 0x0343 | ch == 0x0344;
}
+
+ /**
+ * Reads next Unicode codepoint.
+ *
+ * @return current Unicode codepoint and moves string
+ * index to the next one.
+ */
+ int nextChar() {
+ int ch = 0;
- private int nextCodePoint() {
- char high = pattern[nextIndex()];
-
- if (Character.isHighSurrogate(high)) {
-
- //low and high char may be delimetered by spaces
- int lowExpectedIndex = prevNW + 1;
-
- if (lowExpectedIndex < pattern.length) {
- char low = pattern[lowExpectedIndex];
- if (Character.isLowSurrogate(low)) {
- nextIndex();
- return Character.toCodePoint(high, low);
- }
+ if (!this.isEmpty()) {
+ char nextChar = (char) lookAhead;
+ char curChar = (char) ch;
+
+ if (Lexer.isSurrogatePair(curChar, nextChar)){
+
+ /*
+ * Note that it's slow to create new arrays each time
+ * when calling to nextChar(). This should be optimized
+ * later when we will actively use surrogate codepoints.
+ * You can consider this as simple stub.
+ */
+ char [] curCodePointUTF16 = new char [] {curChar, nextChar};
+ ch = Lexer.codePointAt(curCodePointUTF16, 0);
+ next();
+ next();
+ } else {
+ ch = next();
}
- }
+ }
- return (int) high;
+ return ch;
}
/**
@@ -1293,7 +1331,7 @@
//Lexer.getCanonicalClass(ch) == 0
boolean isBoundary = (canClass == canonClassesTableSize);
- return isBoundary;
+ return isBoundary;
}
/**