You are viewing a plain text version of this content. The canonical link for it is here.
Posted to regexp-dev@jakarta.apache.org by vg...@apache.org on 2004/01/30 15:36:21 UTC
cvs commit: jakarta-regexp/src/java/org/apache/regexp RE.java RETest.java
vgritsenko 2004/01/30 06:36:21
Modified: src/java/org/apache/regexp RE.java RETest.java
Log:
Enhanced match multiline (patch from bug #4137), thanks to Oleg Sukhodolsky
Revision Changes Path
1.15 +23 -17 jakarta-regexp/src/java/org/apache/regexp/RE.java
Index: RE.java
===================================================================
RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RE.java,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- RE.java 6 Sep 2003 01:45:51 -0000 1.14
+++ RE.java 30 Jan 2004 14:36:21 -0000 1.15
@@ -295,6 +295,21 @@
*
* <p>
*
+ * <b><font face="times roman">Line terminators</font></b>
+ * <br>
+ * A line terminator is a one- or two-character sequence that marks
+ * the end of a line of the input character sequence. The following
+ * are recognized as line terminators:
+ * <ul>
+ * <li>A newline (line feed) character ('\n'),</li>
+ * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
+ * <li>A standalone carriage-return character ('\r'),</li>
+ * <li>A next-line character ('\u0085'),</li>
+ * <li>A line-separator character ('\u2028'), or</li>
+ * <li>A paragraph-separator character ('\u2029).</li>
+ * </ul>
+ *
+ * <p>
* RE runs programs compiled by the RECompiler class. But the RE
* matcher class does not include the actual regular expression compiler
* for reasons of efficiency. In fact, if you want to pre-compile one
@@ -462,9 +477,6 @@
static final int offsetNext = 2; // Next index offset (third char)
static final int nodeSize = 3; // Node size (in chars)
- /** Line Separator */
- static final String NEWLINE = System.getProperty("line.separator");
-
// State of current program
REProgram program; // Compiled regular expression 'program'
transient CharacterIterator search; // The string being matched against
@@ -1138,9 +1150,9 @@
case OP_ANY:
- if((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
+ if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
// Match anything
- if(search.isEnd(idx))
+ if (search.isEnd(idx))
{
return -1;
}
@@ -1840,20 +1852,14 @@
/** @return true if at the i-th position in the 'search' a newline ends */
private boolean isNewline(int i) {
+ char nextChar = search.charAt(i);
- if (i < NEWLINE.length() - 1) {
- return false;
- }
-
- if (search.charAt(i) == '\n') {
+ if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
+ || nextChar == '\u2028' || nextChar == '\u2029')
+ {
return true;
}
- for (int j = NEWLINE.length() - 1; j >= 0; j--, i--) {
- if (NEWLINE.charAt(j) != search.charAt(i)) {
- return false;
- }
- }
- return true;
+ return false;
}
}
1.9 +23 -2 jakarta-regexp/src/java/org/apache/regexp/RETest.java
Index: RETest.java
===================================================================
RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- RETest.java 20 Dec 2003 17:21:44 -0000 1.8
+++ RETest.java 30 Jan 2004 14:36:21 -0000 1.9
@@ -411,6 +411,27 @@
s = r.subst("variable=value",
"$1_test_$212", RE.REPLACE_BACKREFERENCES);
assertEquals("Wrong subst() result", "variable_test_value12", s);
+
+ // Test MATCH_MULTILINE. Test for eol/bol symbols.
+ r = new RE("^abc$", RE.MATCH_MULTILINE);
+ if (!r.match("\nabc")) {
+ fail("\"\\nabc\" doesn't match \"^abc$\"");
+ }
+ if (!r.match("\rabc")) {
+ fail("\"\\rabc\" doesn't match \"^abc$\"");
+ }
+ if (!r.match("\r\nabc")) {
+ fail("\"\\r\\nabc\" doesn't match \"^abc$\"");
+ }
+ if (!r.match("\u0085abc")) {
+ fail("\"\\u0085abc\" doesn't match \"^abc$\"");
+ }
+ if (!r.match("\u2028abc")) {
+ fail("\"\\u2028abc\" doesn't match \"^abc$\"");
+ }
+ if (!r.match("\u2029abc")) {
+ fail("\"\\u2029abc\" doesn't match \"^abc$\"");
+ }
}
private void testPrecompiledRE()
@@ -763,7 +784,7 @@
}
log.append(" Paren count: " + regexp.getParenCount() + "\n");
- if(!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount()))
+ if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount()))
{
return false;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: regexp-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: regexp-dev-help@jakarta.apache.org