You are viewing a plain text version of this content. The canonical link for it is here.
Posted to j-dev@xerces.apache.org by bu...@apache.org on 2001/09/12 09:01:52 UTC

[DO NOT REPLY: Bug 3560] New: Five bugs in regex package

PLEASE DO NOT REPLY TO THIS MESSAGE. TO FURTHER COMMENT
ON THE STATUS OF THIS BUG PLEASE FOLLOW THE LINK BELOW
AND USE THE ON-LINE APPLICATION. REPLYING TO THIS MESSAGE
DOES NOT UPDATE THE DATABASE, AND SO YOUR COMMENT WILL
BE LOST SOMEWHERE.

http://nagoya.apache.org/bugzilla/show_bug.cgi?id=3560

*** shadow/3560	Wed Sep 12 00:01:52 2001
--- shadow/3560.tmp.9635	Wed Sep 12 00:01:52 2001
***************
*** 0 ****
--- 1,116 ----
+ +============================================================================+
+ | Five bugs in regex package                                                 |
+ +----------------------------------------------------------------------------+
+ |        Bug #: 3560                        Product: Xerces-J                |
+ |       Status: NEW                         Version: CVS extract             |
+ |   Resolution:                            Platform: All                     |
+ |     Severity: Normal                   OS/Version: All                     |
+ |     Priority: Other                     Component: Schema-Datatypes        |
+ +----------------------------------------------------------------------------+
+ |  Assigned To: xerces-j-dev@xml.apache.org                                  |
+ |  Reported By: kent@hauN.org                                                |
+ |      CC list: Cc:                                                          |
+ +----------------------------------------------------------------------------+
+ |          URL:                                                              |
+ +============================================================================+
+ |                              DESCRIPTION                                   |
+ - REUtil.quoteMeta() removes meta characters
+     quoteMeta("aa+") returns aa\
+     This does not occur when the package is used in Xerces parser.
+ 
+ - Shorthands in negative character classes cause IllegalArgumentException
+      for example, [^\w]
+ 
+ - Union operator terminates matching in some cases
+     The pattern "fo|foo" should match to "foobar" in non-XML-Schema mode.
+     This does not occur in Xerces parser.
+ 
+ - Wrong surrogate processing
+     The pattern "a\ud800\udc00+" should match to "a\ud800\udc00\ud800\udc00",
+     actually it does not match.
+ 
+ - REUtil.createRegex() crashes when it is called 21 times.
+     This does not occur in Xerces parser.
+ 
+ 
+ The following code checks behavior of the regex package.
+ The code should print some '.' and `Passed/total: 26/26'.
+ 
+ public class RegexTest {
+     int total = 0;
+     int passed = 0;
+ 
+     void test(String pattern, String options, String target, boolean expected) {
+         RegularExpression re = new RegularExpression(pattern, options);
+         this.total++;
+         boolean actual = re.matches(target);
+         if (expected == actual) {
+             this.passed++;
+             System.err.print(".");
+         } else {
+             System.err.print("*");
+             System.err.print(" actual="+actual);
+             System.err.print(" '"+re+"' '"+target+"' ");
+         }
+         System.err.flush();
+     }
+ 
+     void testQuote(String source, String expected) {
+         this.total ++;
+         String result = REUtil.quoteMeta(source);
+         if (result.equals(expected)) {
+             this.passed ++;
+             System.err.print(".");
+         } else {
+             System.err.print("*("+result+")");
+         }
+         System.err.flush();
+     }
+ 
+     private void dumpResult(String[] array) {
+         System.err.print(" ");
+         for (int i = 0;  i < array.length;  i++) {
+             System.err.print("'");
+             System.err.print(array[i]);
+             System.err.print("' ");
+         }
+     }
+ 
+     public static void main(String[] argv) throws Exception {
+         RegexTest stat = new RegexTest();
+                      // pattern/to,  source,   expected
+         stat.testQuote("aaaa", "aaaa");
+         stat.testQuote("aa+a", "aa\\+a");
+         stat.testQuote("*bbbb", "\\*bbbb");
+         stat.testQuote("bbbb.", "bbbb\\.");
+         stat.testQuote("bbbb*.", "bbbb\\*\\.");
+         System.err.println("");
+ 
+         stat.test("a\ud800\udc00", "", "a\ud800\udc00", true);
+         stat.test("^a\ud800\udc00+$", "", "a\ud800\udc00\udc00", false);
+         stat.test("^a\ud800\udc00+$", "", "a\ud800\udc00\ud800\udc00", true);
+         stat.test("a.\ud800\udc00", "", "a\ud800\udc00\ud800\udc00", true);
+         stat.test("foo(?=bar)", "", "foobar", true);
+         stat.test("foo(?=bar)", "", "foobab", false);
+         stat.test("foo(?!bar)", "", "foobar", false);
+         stat.test("foo(?!bar)", "", "foobab", true);
+         stat.test("(?<=bar)foo", "", "barfoo", true);
+         stat.test("(?<=bar)foo", "", "babfoo", false);
+         stat.test("(?<!bar)foo", "", "barfoo", false);
+         stat.test("(?<!bar)foo", "", "babfoo", true);
+ 
+         stat.test("fo|foo", "X", "foo", true);
+         stat.test("fo|foo", "", "foabc", true);
+         stat.test("fo|foo", "", "fooabc", true);
+ 
+         stat.test("[^\\d\\w]", "", "+", true);
+         stat.test("[^\\d\\w]", "X", "+", true);
+         stat.test("[^\\d\\w]", "", "a", false);
+         stat.test("[^\\d\\w]", "X", "a", false);
+         stat.test("[^\\d\\w]", "", "0", false);
+         stat.test("[^\\d\\w]", "X", "0", false);
+         System.out.println("");
+ 
+         System.err.println("Passed/total: "+stat.passed+"/"+stat.total);
+     }
+ }

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-j-dev-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-j-dev-help@xml.apache.org