You are viewing a plain text version of this content. The canonical link for it is here.
Posted to regexp-dev@jakarta.apache.org by vg...@apache.org on 2004/01/30 15:36:21 UTC

cvs commit: jakarta-regexp/src/java/org/apache/regexp RE.java RETest.java

vgritsenko    2004/01/30 06:36:21

  Modified:    src/java/org/apache/regexp RE.java RETest.java
  Log:
  Enhanced match multiline (patch from bug #4137), thanks to Oleg Sukhodolsky
  
  Revision  Changes    Path
  1.15      +23 -17    jakarta-regexp/src/java/org/apache/regexp/RE.java
  
  Index: RE.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RE.java,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -r1.14 -r1.15
  --- RE.java	6 Sep 2003 01:45:51 -0000	1.14
  +++ RE.java	30 Jan 2004 14:36:21 -0000	1.15
  @@ -295,6 +295,21 @@
    *
    * <p>
    *
  + * <b><font face="times roman">Line terminators</font></b>
  + * <br>
  + * A line terminator is a one- or two-character sequence that marks
  + * the end of a line of the input character sequence. The following
  + * are recognized as line terminators:
  + * <ul>
  + * <li>A newline (line feed) character ('\n'),</li>
  + * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li>
  + * <li>A standalone carriage-return character ('\r'),</li>
  + * <li>A next-line character ('\u0085'),</li>
  + * <li>A line-separator character ('\u2028'), or</li>
  + * <li>A paragraph-separator character ('\u2029).</li>
  + * </ul>
  + *
  + * <p>
    * RE runs programs compiled by the RECompiler class.  But the RE
    * matcher class does not include the actual regular expression compiler
    * for reasons of efficiency.  In fact, if you want to pre-compile one
  @@ -462,9 +477,6 @@
       static final int offsetNext   = 2;            // Next index offset (third char)
       static final int nodeSize     = 3;            // Node size (in chars)
   
  -    /** Line Separator */
  -    static final String NEWLINE = System.getProperty("line.separator");
  -
       // State of current program
       REProgram program;                            // Compiled regular expression 'program'
       transient CharacterIterator search;           // The string being matched against
  @@ -1138,9 +1150,9 @@
   
                   case OP_ANY:
   
  -                    if((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
  +                    if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) {
                           // Match anything
  -                        if(search.isEnd(idx))
  +                        if (search.isEnd(idx))
                           {
                               return -1;
                           }
  @@ -1840,20 +1852,14 @@
   
       /** @return true if at the i-th position in the 'search' a newline ends */
       private boolean isNewline(int i) {
  +        char nextChar = search.charAt(i);
   
  -        if (i < NEWLINE.length() - 1) {
  -            return false;
  -        }
  -
  -        if (search.charAt(i) == '\n') {
  +        if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085'
  +            || nextChar == '\u2028' || nextChar == '\u2029')
  +        {
               return true;
           }
   
  -        for (int j = NEWLINE.length() - 1; j >= 0; j--, i--) {
  -            if (NEWLINE.charAt(j) != search.charAt(i)) {
  -                return false;
  -            }
  -        }
  -        return true;
  +        return false;
       }
   }
  
  
  
  1.9       +23 -2     jakarta-regexp/src/java/org/apache/regexp/RETest.java
  
  Index: RETest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- RETest.java	20 Dec 2003 17:21:44 -0000	1.8
  +++ RETest.java	30 Jan 2004 14:36:21 -0000	1.9
  @@ -411,6 +411,27 @@
           s = r.subst("variable=value",
                       "$1_test_$212", RE.REPLACE_BACKREFERENCES);
           assertEquals("Wrong subst() result", "variable_test_value12", s);
  +
  +        // Test MATCH_MULTILINE. Test for eol/bol symbols.
  +        r = new RE("^abc$", RE.MATCH_MULTILINE);
  +        if (!r.match("\nabc")) {
  +            fail("\"\\nabc\" doesn't match \"^abc$\"");
  +        }
  +        if (!r.match("\rabc")) {
  +            fail("\"\\rabc\" doesn't match \"^abc$\"");
  +        }
  +        if (!r.match("\r\nabc")) {
  +            fail("\"\\r\\nabc\" doesn't match \"^abc$\"");
  +        }
  +        if (!r.match("\u0085abc")) {
  +            fail("\"\\u0085abc\" doesn't match \"^abc$\"");
  +        }
  +        if (!r.match("\u2028abc")) {
  +            fail("\"\\u2028abc\" doesn't match \"^abc$\"");
  +        }
  +        if (!r.match("\u2029abc")) {
  +            fail("\"\\u2029abc\" doesn't match \"^abc$\"");
  +        }
       }
   
       private void testPrecompiledRE()
  @@ -763,7 +784,7 @@
           }
   
           log.append("   Paren count: " + regexp.getParenCount() + "\n");
  -        if(!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount()))
  +        if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount()))
           {
               return false;
           }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: regexp-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: regexp-dev-help@jakarta.apache.org