You are viewing a plain text version of this content. The canonical link for it is here.
Posted to regexp-dev@jakarta.apache.org by vg...@apache.org on 2005/08/12 21:47:13 UTC

svn commit: r232365 - in /jakarta/regexp/trunk: docs/RETest.txt docs/changes.html docs/jakarta-regexp.jar src/java/org/apache/regexp/RECompiler.java xdocs/RETest.txt xdocs/changes.xml

Author: vgritsenko
Date: Fri Aug 12 12:47:04 2005
New Revision: 232365

URL: http://svn.apache.org/viewcvs?rev=232365&view=rev
Log:
Fixed Bug:
RE creates wrong character class when overlapping character ranges are
used (ex: [a-h0f-z]) (VG)

Fixed Bug
<a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=30126">30126</a>:
Support negated character classes (\W, \D, \S) in custom character class (VG)


Modified:
    jakarta/regexp/trunk/docs/RETest.txt
    jakarta/regexp/trunk/docs/changes.html
    jakarta/regexp/trunk/docs/jakarta-regexp.jar
    jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
    jakarta/regexp/trunk/xdocs/RETest.txt
    jakarta/regexp/trunk/xdocs/changes.xml

Modified: jakarta/regexp/trunk/docs/RETest.txt
URL: http://svn.apache.org/viewcvs/jakarta/regexp/trunk/docs/RETest.txt?rev=232365&r1=232364&r2=232365&view=diff
==============================================================================
--- jakarta/regexp/trunk/docs/RETest.txt (original)
+++ jakarta/regexp/trunk/docs/RETest.txt Fri Aug 12 12:47:04 2005
@@ -1473,3 +1473,46 @@
 2004-01-01
 -01
 -01
+
+#218
+[\W]
+a
+NO
+
+#219
+[\W]
+1
+NO
+
+#220
+[\W]
+!
+YES
+1
+!
+
+#221
+[^\W]
+a
+YES
+1
+a
+
+#222
+[^\W]
+_
+YES
+1
+_
+
+#223
+[\D\S]+
+@0 1	_1AByz
+YES
+1
+@0 1	_1AByz
+
+#224
+[^\D\S]
+@0 1	_1AByz
+NO

Modified: jakarta/regexp/trunk/docs/changes.html
URL: http://svn.apache.org/viewcvs/jakarta/regexp/trunk/docs/changes.html?rev=232365&r1=232364&r2=232365&view=diff
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Fri Aug 12 12:47:04 2005
@@ -92,6 +92,12 @@
 
 <h3>Version 1.4-dev</h3>
 <ul>
+<li>Fixed Bug:
+    RE creates wrong character class when overlapping character ranges are
+    used (ex: [a-h0f-z]) (VG)</li>
+<li>Fixed Bug
+    <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=30126">30126</a>:
+    Support negated character classes (\W, \D, \S) in custom character class (VG)</li>
 <li>Applied patches for Bug
     <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27795">27795</a>:
     Add optimization for regexps which start with ^ (BOL) (VG)</li>

Modified: jakarta/regexp/trunk/docs/jakarta-regexp.jar
URL: http://svn.apache.org/viewcvs/jakarta/regexp/trunk/docs/jakarta-regexp.jar?rev=232365&r1=232364&r2=232365&view=diff
==============================================================================
Binary files - no diff available.

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL: http://svn.apache.org/viewcvs/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?rev=232365&r1=232364&r2=232365&view=diff
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Fri Aug 12 12:47:04 2005
@@ -612,9 +612,24 @@
                             switch (pattern.charAt(idx - 1))
                             {
                                 case RE.E_NSPACE:
-                                case RE.E_NDIGIT:
+                                    range.include(Character.MIN_VALUE, 7, include);   // [Min - \b )
+                                    range.include((char) 11, include);                // ( \n - \f )
+                                    range.include(14, 31, include);                   // ( \r - ' ')
+                                    range.include(33, Character.MAX_VALUE, include);  // (' ' - Max]
+                                    break;
+
                                 case RE.E_NALNUM:
-                                    syntaxError("Bad character class");
+                                    range.include(Character.MIN_VALUE, '/', include); // [Min - '0')
+                                    range.include(':', '@', include);                 // ('9' - 'A')
+                                    range.include('[', '^', include);                 // ('Z' - '_')
+                                    range.include('`', include);                      // ('_' - 'a')
+                                    range.include('{', Character.MAX_VALUE, include); // ('z' - Max]
+                                    break;
+
+                                case RE.E_NDIGIT:
+                                    range.include(Character.MIN_VALUE, '/', include); // [Min - '0')
+                                    range.include(':', Character.MAX_VALUE, include); // ('9' - Max]
+                                    break;
 
                                 case RE.E_SPACE:
                                     range.include('\t', include);
@@ -1403,8 +1418,8 @@
                 // Min is in the range, but max is outside
                 else if (min >= minRange[i] && min <= maxRange[i])
                 {
-                    delete(i);
                     min = minRange[i];
+                    delete(i);
                     merge(min, max);
                     return;
                 }
@@ -1412,8 +1427,8 @@
                 // Max is in the range, but min is outside
                 else if (max >= minRange[i] && max <= maxRange[i])
                 {
-                    delete(i);
                     max = maxRange[i];
+                    delete(i);
                     merge(min, max);
                     return;
                 }

Modified: jakarta/regexp/trunk/xdocs/RETest.txt
URL: http://svn.apache.org/viewcvs/jakarta/regexp/trunk/xdocs/RETest.txt?rev=232365&r1=232364&r2=232365&view=diff
==============================================================================
--- jakarta/regexp/trunk/xdocs/RETest.txt (original)
+++ jakarta/regexp/trunk/xdocs/RETest.txt Fri Aug 12 12:47:04 2005
@@ -1473,3 +1473,46 @@
 2004-01-01
 -01
 -01
+
+#218
+[\W]
+a
+NO
+
+#219
+[\W]
+1
+NO
+
+#220
+[\W]
+!
+YES
+1
+!
+
+#221
+[^\W]
+a
+YES
+1
+a
+
+#222
+[^\W]
+_
+YES
+1
+_
+
+#223
+[\D\S]+
+@0 1	_1AByz
+YES
+1
+@0 1	_1AByz
+
+#224
+[^\D\S]
+@0 1	_1AByz
+NO

Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL: http://svn.apache.org/viewcvs/jakarta/regexp/trunk/xdocs/changes.xml?rev=232365&r1=232364&r2=232365&view=diff
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Fri Aug 12 12:47:04 2005
@@ -34,6 +34,12 @@
 
 <h3>Version 1.4-dev</h3>
 <ul>
+<li>Fixed Bug:
+    RE creates wrong character class when overlapping character ranges are
+    used (ex: [a-h0f-z]) (VG)</li>
+<li>Fixed Bug
+    <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=30126">30126</a>:
+    Support negated character classes (\W, \D, \S) in custom character class (VG)</li>
 <li>Applied patches for Bug
     <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27795">27795</a>:
     Add optimization for regexps which start with ^ (BOL) (VG)</li>



---------------------------------------------------------------------
To unsubscribe, e-mail: regexp-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: regexp-dev-help@jakarta.apache.org