You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-dev@xerces.apache.org by bu...@apache.org on 2001/09/12 09:01:52 UTC
[DO NOT REPLY: Bug 3560] New:
Five bugs in regex package
PLEASE DO NOT REPLY TO THIS MESSAGE. TO FURTHER COMMENT
ON THE STATUS OF THIS BUG PLEASE FOLLOW THE LINK BELOW
AND USE THE ON-LINE APPLICATION. REPLYING TO THIS MESSAGE
DOES NOT UPDATE THE DATABASE, AND SO YOUR COMMENT WILL
BE LOST SOMEWHERE.
http://nagoya.apache.org/bugzilla/show_bug.cgi?id=3560
*** shadow/3560 Wed Sep 12 00:01:52 2001
--- shadow/3560.tmp.9635 Wed Sep 12 00:01:52 2001
***************
*** 0 ****
--- 1,116 ----
+ +============================================================================+
+ | Five bugs in regex package |
+ +----------------------------------------------------------------------------+
+ | Bug #: 3560 Product: Xerces-J |
+ | Status: NEW Version: CVS extract |
+ | Resolution: Platform: All |
+ | Severity: Normal OS/Version: All |
+ | Priority: Other Component: Schema-Datatypes |
+ +----------------------------------------------------------------------------+
+ | Assigned To: xerces-j-dev@xml.apache.org |
+ | Reported By: kent@hauN.org |
+ | CC list: Cc: |
+ +----------------------------------------------------------------------------+
+ | URL: |
+ +============================================================================+
+ | DESCRIPTION |
+ - REUtil.quoteMeta() removes meta characters
+ quoteMeta("aa+") returns aa\
+ This does not occur when the package is used in Xerces parser.
+
+ - Shorthands in negative character classes cause IllegalArgumentException
+ for example, [^\w]
+
+ - Union operator terminates matching in some cases
+ The pattern "fo|foo" should match to "foobar" in non-XML-Schema mode.
+ This does not occur in Xerces parser.
+
+ - Wrong surrogate processing
+ The pattern "a\ud800\udc00+" should match to "a\ud800\udc00\ud800\udc00",
+ actually it does not match.
+
+ - REUtil.createRegex() crashes when it is called 21 times.
+ This does not occur in Xerces parser.
+
+
+ The following code checks behavior of the regex package.
+ The code should print some '.' and `Passed/total: 26/26'.
+
+ public class RegexTest {
+ int total = 0;
+ int passed = 0;
+
+ void test(String pattern, String options, String target, boolean expected) {
+ RegularExpression re = new RegularExpression(pattern, options);
+ this.total++;
+ boolean actual = re.matches(target);
+ if (expected == actual) {
+ this.passed++;
+ System.err.print(".");
+ } else {
+ System.err.print("*");
+ System.err.print(" actual="+actual);
+ System.err.print(" '"+re+"' '"+target+"' ");
+ }
+ System.err.flush();
+ }
+
+ void testQuote(String source, String expected) {
+ this.total ++;
+ String result = REUtil.quoteMeta(source);
+ if (result.equals(expected)) {
+ this.passed ++;
+ System.err.print(".");
+ } else {
+ System.err.print("*("+result+")");
+ }
+ System.err.flush();
+ }
+
+ private void dumpResult(String[] array) {
+ System.err.print(" ");
+ for (int i = 0; i < array.length; i++) {
+ System.err.print("'");
+ System.err.print(array[i]);
+ System.err.print("' ");
+ }
+ }
+
+ public static void main(String[] argv) throws Exception {
+ RegexTest stat = new RegexTest();
+ // pattern/to, source, expected
+ stat.testQuote("aaaa", "aaaa");
+ stat.testQuote("aa+a", "aa\\+a");
+ stat.testQuote("*bbbb", "\\*bbbb");
+ stat.testQuote("bbbb.", "bbbb\\.");
+ stat.testQuote("bbbb*.", "bbbb\\*\\.");
+ System.err.println("");
+
+ stat.test("a\ud800\udc00", "", "a\ud800\udc00", true);
+ stat.test("^a\ud800\udc00+$", "", "a\ud800\udc00\udc00", false);
+ stat.test("^a\ud800\udc00+$", "", "a\ud800\udc00\ud800\udc00", true);
+ stat.test("a.\ud800\udc00", "", "a\ud800\udc00\ud800\udc00", true);
+ stat.test("foo(?=bar)", "", "foobar", true);
+ stat.test("foo(?=bar)", "", "foobab", false);
+ stat.test("foo(?!bar)", "", "foobar", false);
+ stat.test("foo(?!bar)", "", "foobab", true);
+ stat.test("(?<=bar)foo", "", "barfoo", true);
+ stat.test("(?<=bar)foo", "", "babfoo", false);
+ stat.test("(?<!bar)foo", "", "barfoo", false);
+ stat.test("(?<!bar)foo", "", "babfoo", true);
+
+ stat.test("fo|foo", "X", "foo", true);
+ stat.test("fo|foo", "", "foabc", true);
+ stat.test("fo|foo", "", "fooabc", true);
+
+ stat.test("[^\\d\\w]", "", "+", true);
+ stat.test("[^\\d\\w]", "X", "+", true);
+ stat.test("[^\\d\\w]", "", "a", false);
+ stat.test("[^\\d\\w]", "X", "a", false);
+ stat.test("[^\\d\\w]", "", "0", false);
+ stat.test("[^\\d\\w]", "X", "0", false);
+ System.out.println("");
+
+ System.err.println("Passed/total: "+stat.passed+"/"+stat.total);
+ }
+ }
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-j-dev-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-j-dev-help@xml.apache.org