You are viewing a plain text version of this content. The canonical link for it is here.
Posted to regexp-dev@jakarta.apache.org by gh...@apache.org on 2001/02/27 09:37:08 UTC

cvs commit: jakarta-regexp/xdocs RETest.txt

gholam      01/02/27 00:37:07

  Modified:    build    build-regexp.xml
               docs     RETest.txt
               src/java/org/apache/regexp RECompiler.java
                        REDebugCompiler.java RESyntaxException.java
                        RETest.java
               xdocs    RETest.txt
  Added:       build    run-tests.sh
  Log:
  Changed RESyntaxException to be an unchecked exception.
  
  Fixed the array out of bounds exception. This was occurring when compiling
  and expression with certain branches. The nextOfEnd methods tried to set the
  offset of a node that was not initialised.
  
  Fixed an infinite loop bug with reluctant closures. Not a total fix.
  The relucatant closure code should be recursive like the greedy closures.
  Works in most cases now though.
  
  Added some more tests.
  
  Allowed a user to override some of the build-script settings esp
  build.compiler and related things.
  
  Added a script to run RETest.
  
  Fixed the error message in RETest so it prints an RE program trace on failure.
  Cleaned RETest up a little bit. Can now use command line params properly.
  
  Revision  Changes    Path
  1.7       +1 -0      jakarta-regexp/build/build-regexp.xml
  
  Index: build-regexp.xml
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/build/build-regexp.xml,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- build-regexp.xml	2001/02/11 23:04:21	1.6
  +++ build-regexp.xml	2001/02/27 08:37:04	1.7
  @@ -8,6 +8,7 @@
       <!-- =================================================================== -->
       <!-- Initializes some variables                                          -->
       <!-- =================================================================== -->
  +    <property file="${user.home}/.jakarta-regexp.properties"/>
       <property name="ant.home" value="."/>
       <property name="Name" value="Jakarta-Regexp"/>
       <property name="year" value="2001"/>
  
  
  
  1.1                  jakarta-regexp/build/run-tests.sh
  
  Index: run-tests.sh
  ===================================================================
  #!/bin/sh
  
  #--------------------------------------------
  # No need to edit anything past here
  #--------------------------------------------
  if test -z "${JAVA_HOME}" ; then
      echo "ERROR: JAVA_HOME not found in your environment."
      echo "Please, set the JAVA_HOME variable in your environment to match the"
      echo "location of the Java Virtual Machine you want to use."
      exit
  fi
  
  if test -f ${JAVA_HOME}/lib/tools.jar ; then
      CLASSPATH=${CLASSPATH}:${JAVA_HOME}/lib/tools.jar
  fi
  
  # convert the existing path to unix
  if [ "$OSTYPE" = "cygwin32" ] || [ "$OSTYPE" = "cygwin" ] ; then
     CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
  fi
  
  # Add in your .jar files first
  for i in ./*.jar
  do
      CLASSPATH=$CLASSPATH:"$i"
  done
  # Add in the jakarta-site2 library files
  for i in ../../jakarta-site2/lib/*.jar
  do
      CLASSPATH=$CLASSPATH:"$i"
  done
  
  # convert the unix path to windows
  if [ "$OSTYPE" = "cygwin32" ] || [ "$OSTYPE" = "cygwin" ] ; then
     CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
  fi
  
  CLASSPATH=$CLASSPATH:../bin/jakarta-regexp-1.3-dev.jar
  
  ${JAVA_HOME}/bin/java -Xint -classpath ${CLASSPATH} org.apache.regexp.RETest $1 $2
  
  
  
  1.3       +34 -0     jakarta-regexp/docs/RETest.txt
  
  Index: RETest.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/docs/RETest.txt,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- RETest.txt	2001/02/11 23:04:21	1.2
  +++ RETest.txt	2001/02/27 08:37:05	1.3
  @@ -978,3 +978,37 @@
   www.test.com
   YES
   www.test.com
  +
  +#163
  +abc.*?x+yz
  +abcaaaaaxyzbbbbxyz
  +YES
  +abcaaaaaxyz
  +
  +#164
  +abc.+?x+yz
  +abcaaaaaxyzbbbbxyz
  +YES
  +abcaaaaaxyz
  +
  +#165
  +a.+?(c|d)
  +aaaacaaaaad
  +YES
  +aaaac
  +c
  +
  +#166
  +a.+(c|d)
  +aaaacaaaaad
  +YES
  +aaaacaaaaad
  +d
  +
  +#167
  +a+?b+?c+?
  +aaabccaaabbbccc
  +YES
  +aaabc
  +
  +
  
  
  
  1.4       +31 -10    jakarta-regexp/src/java/org/apache/regexp/RECompiler.java
  
  Index: RECompiler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RECompiler.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- RECompiler.java	2001/02/11 23:04:22	1.3
  +++ RECompiler.java	2001/02/27 08:37:05	1.4
  @@ -71,7 +71,8 @@
    * @see recompile
    *
    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  - * @version $Id: RECompiler.java,v 1.3 2001/02/11 23:04:22 jon Exp $
  + * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
  + * @version $Id: RECompiler.java,v 1.4 2001/02/27 08:37:05 gholam Exp $
    */
   public class RECompiler
   {
  @@ -203,14 +204,29 @@
       void setNextOfEnd(int node, int pointTo)
       {
           // Traverse the chain until the next offset is 0
  -        int next;
  -        while ((next = instruction[node + RE.offsetNext]) != 0)
  -        {
  +        int next = instruction[node + RE.offsetNext];
  +        // while the 'node' is not the last in the chain
  +        // and the 'node' is not the last in the program.
  +        while ( next != 0 && node < lenInstruction )
  +        {
  +            // if the node we are supposed to point to is in the chain then
  +            // point to the end of the program instead.
  +            // Michael McCallum <gh...@xtra.co.nz>
  +            // FIXME: // This is a _hack_ to stop infinite programs.
  +            // I believe that the implementation of the reluctant matches is wrong but
  +            // have not worked out a better way yet.
  +            if ( node == pointTo ) {
  +              pointTo = lenInstruction;
  +            }
               node += next;
  +            next = instruction[node + RE.offsetNext];
           }
  -
  -        // Point the last node in the chain to pointTo.
  -        instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
  +        // if we have reached the end of the program then dont set the pointTo.
  +        // im not sure if this will break any thing but passes all the tests.
  +        if ( node < lenInstruction ) {
  +            // Point the last node in the chain to pointTo.
  +            instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
  +        }
       }
   
       /**
  @@ -1258,13 +1274,18 @@
           setNextOfEnd(ret, end);
   
           // Hook the ends of each branch to the end node
  -        for (int next = -1, i = ret; next != 0; next = instruction[i + RE.offsetNext], i += next)
  +        int currentNode = ret;
  +        int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
  +        // while the next node o
  +        while ( nextNodeOffset != 0 && currentNode < lenInstruction )
           {
               // If branch, make the end of the branch's operand chain point to the end node.
  -            if (instruction[i + RE.offsetOpcode] == RE.OP_BRANCH)
  +            if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
               {
  -                setNextOfEnd(i + RE.nodeSize, end);
  +                setNextOfEnd( currentNode + RE.nodeSize, end );
               }
  +            nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
  +            currentNode += nextNodeOffset;
           }
   
           // Return the node list
  
  
  
  1.2       +35 -1     jakarta-regexp/src/java/org/apache/regexp/REDebugCompiler.java
  
  Index: REDebugCompiler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/REDebugCompiler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- REDebugCompiler.java	2000/04/27 01:22:33	1.1
  +++ REDebugCompiler.java	2001/02/27 08:37:06	1.2
  @@ -65,7 +65,7 @@
    * for debugging purposes.
    *
    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  - * @version $Id: REDebugCompiler.java,v 1.1 2000/04/27 01:22:33 jon Exp $
  + * @version $Id: REDebugCompiler.java,v 1.2 2001/02/27 08:37:06 gholam Exp $
    */
   public class REDebugCompiler extends RECompiler
   {
  @@ -95,6 +95,8 @@
           hashOpcode.put(new Integer(RE.OP_CLOSE),            "OP_CLOSE");
           hashOpcode.put(new Integer(RE.OP_BACKREF),          "OP_BACKREF");
           hashOpcode.put(new Integer(RE.OP_POSIXCLASS),       "OP_POSIXCLASS");
  +        hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER),      "OP_OPEN_CLUSTER");
  +        hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER),      "OP_CLOSE_CLUSTER");
       }
   
       /**
  @@ -146,6 +148,38 @@
           // Return opcode as a string and opdata value
           return opcodeToString(opcode) + ", opdata = " + opdata;
       }
  +
  +    /**
  +     * Inserts a node with a given opcode and opdata at insertAt.  The node relative next
  +     * pointer is initialized to 0.
  +     * @param opcode Opcode for new node
  +     * @param opdata Opdata for new node (only the low 16 bits are currently used)
  +     * @param insertAt Index at which to insert the new node in the program * /
  +    void nodeInsert(char opcode, int opdata, int insertAt) {
  +        System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
  +        PrintWriter writer = new PrintWriter( System.out );
  +        dumpProgram( writer );
  +        super.nodeInsert( opcode, opdata, insertAt );
  +        System.out.println( "====< " );
  +        dumpProgram( writer );
  +        writer.flush();
  +    }/**/
  +
  +
  +    /**
  +    * Appends a node to the end of a node chain
  +    * @param node Start of node chain to traverse
  +    * @param pointTo Node to have the tail of the chain point to * /
  +    void setNextOfEnd(int node, int pointTo) {
  +        System.out.println( "====> " + node + " " + pointTo );
  +        PrintWriter writer = new PrintWriter( System.out );
  +        dumpProgram( writer );
  +        super.setNextOfEnd( node, pointTo );
  +        System.out.println( "====< " );
  +        dumpProgram( writer );
  +        writer.flush();
  +    }/**/
  +
   
       /**
        * Dumps the current program to a PrintWriter
  
  
  
  1.2       +8 -3      jakarta-regexp/src/java/org/apache/regexp/RESyntaxException.java
  
  Index: RESyntaxException.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RESyntaxException.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- RESyntaxException.java	2000/04/27 01:22:33	1.1
  +++ RESyntaxException.java	2001/02/27 08:37:06	1.2
  @@ -55,15 +55,20 @@
    * information on the Apache Software Foundation, please see
    * <http://www.apache.org/>.
    *
  - */ 
  + */
   
   /**
    * Exception thrown to indicate a syntax error in a regular expression.
  + * This is a non-checked exception because you should only have problems compiling
  + * a regular expression during development.
  + * If you are making regular expresion programs dynamically then you can catch it
  + * if you wish. But should not be forced to.
    *
    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
  - * @version $Id: RESyntaxException.java,v 1.1 2000/04/27 01:22:33 jon Exp $
  + * @author <a href="mailto:gholam@xtra.co.nz>Michael McCallum</a>
  + * @version $Id: RESyntaxException.java,v 1.2 2001/02/27 08:37:06 gholam Exp $
    */
  -public class RESyntaxException extends Exception
  +public class RESyntaxException extends RuntimeException
   {
       /**
        * Constructor.
  
  
  
  1.4       +55 -30    jakarta-regexp/src/java/org/apache/regexp/RETest.java
  
  Index: RETest.java
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- RETest.java	2001/02/11 23:04:22	1.3
  +++ RETest.java	2001/02/27 08:37:06	1.4
  @@ -21,7 +21,7 @@
    *    distribution.
    *
    * 3. The end-user documentation included with the redistribution, if
  - *    any, must include the following acknowlegement:  
  + *    any, must include the following acknowlegement:
    *       "This product includes software developed by the 
    *        Apache Software Foundation (http://www.apache.org/)."
    *    Alternately, this acknowlegement may appear in the software itself,
  @@ -57,7 +57,11 @@
    *
    */
   
  -import java.io.*;
  +import java.io.BufferedReader;
  +import java.io.FileReader;
  +import java.io.InputStreamReader;
  +import java.io.PrintWriter;
  +import java.io.File;
   
   /**
    * Data driven (and optionally interactive) testing harness to exercise regular
  @@ -65,17 +69,21 @@
    *
    * @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
    * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
  - * @version $Id: RETest.java,v 1.3 2001/02/11 23:04:22 jon Exp $
  + * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
  + * @version $Id: RETest.java,v 1.4 2001/02/27 08:37:06 gholam Exp $
    */
   public class RETest
   {
  -    // Construct a matcher and a debug compiler
  -    RE r = new RE();
  -    REDebugCompiler compiler = new REDebugCompiler();
  -
       // True if we want to see output from success cases
       static final boolean showSuccesses = false;
   
  +    // A new line character.
  +    static final String NEW_LINE = System.getProperty( "line.separator" );
  +
  +    // Construct a matcher and a debug compiler
  +    RE r = new RE();
  +    REDebugCompiler compiler = new REDebugCompiler();
  +    
       /**
        * Main program entrypoint.  If an argument is given, it will be compiled
        * and interactive matching will ensue.  If no argument is given, the
  @@ -141,13 +149,16 @@
               r.setProgram(compiler.compile(expr));
   
               // Show expression
  -            say("\n" + expr + "\n");
  +            say("" + NEW_LINE + "" + expr + "" + NEW_LINE + "");
   
               // Show program for compiled expression
  -            compiler.dumpProgram(new PrintWriter(System.out));
  +            PrintWriter writer = new PrintWriter( System.out );
  +            compiler.dumpProgram( writer );
  +            writer.flush();
   
  +            boolean running = true;
               // Test matching against compiled expression
  -            while (true)
  +            while ( running )
               {
                   // Read from keyboard
                   BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
  @@ -155,18 +166,26 @@
                   System.out.flush();
                   String match = br.readLine();
   
  -                // Try a match against the keyboard input
  -                if (r.match(match))
  +                if ( match != null )
                   {
  -                    say("Match successful.");
  +                    // Try a match against the keyboard input
  +                    if (r.match(match))
  +                    {
  +                        say("Match successful.");
  +                    }
  +                    else
  +                    {
  +                        say("Match failed.");
  +                    }
  +
  +                    // Show subparen registers
  +                    showParens(r);
                   }
                   else
                   {
  -                    say("Match failed.");
  +                    running = false;
  +                    System.out.println();
                   }
  -
  -                // Show subparen registers
  -                showParens(r);
               }
           }
           catch (Exception e)
  @@ -187,21 +206,25 @@
       }
   
       /**
  -     * Fail with an error
  -     * @param s Failure description
  +    * Fail with an error.
  +    * Will print a big failure message to System.out.
  +    * @param s Failure description
       */
       void fail(String s)
       {
           failures++;
  -        say("\n");
  +        say("" + NEW_LINE + "");
           say("*******************************************************");
           say("*********************  FAILURE!  **********************");
           say("*******************************************************");
  -        say("\n");
  +        say("" + NEW_LINE + "");
           say(s);
  -        say("");        
  -        compiler.dumpProgram(new PrintWriter(System.out));
  -        say("\n");
  +        say("");
  +        // make sure the writer gets flushed.
  +        PrintWriter writer = new PrintWriter( System.out );
  +        compiler.dumpProgram( writer );
  +        writer.flush();
  +        say("" + NEW_LINE + "");
       }
   
       /**
  @@ -231,7 +254,7 @@
       */
       void show()
       {
  -        say("\n-----------------------\n");
  +        say("" + NEW_LINE + "-----------------------" + NEW_LINE + "");
           say("Expression #" + (n) + " \"" + expr + "\" ");
       }
   
  @@ -371,7 +394,9 @@
                       }
   
                       // Wasn't supposed to be an error
  -                    fail("Produces the unexpected error \"" + e.getMessage() + "\"");
  +                    String message = e.getMessage() == null ? e.toString() : e.getMessage();
  +                    fail("Produces an unexpected exception \"" + message + "\"");
  +                    e.printStackTrace();
                   }
                   catch (Error e)
                   {
  @@ -472,14 +497,14 @@
                   }
   
                   // Matcher blew it
  -                catch (Exception e)
  +                catch(Exception e)
                   {
                       fail("Matcher threw exception: " + e.toString());
                       e.printStackTrace();
                   }
   
                   // Internal error
  -                catch (Error e)
  +                catch(Error e)
                   {
                       fail("Matcher threw fatal error \"" + e.getMessage() + "\"");
                       e.printStackTrace();
  @@ -492,9 +517,9 @@
           }
   
           // Show match time
  -        System.out.println ("\n\nMatch time = " + (System.currentTimeMillis() - ms) + " ms.");
  +        System.out.println( NEW_LINE + NEW_LINE + "Match time = " + (System.currentTimeMillis() - ms) + " ms.");
   
           // Print final results
  -        System.out.println ("\nTests complete.  " + n + " tests, " + failures + " failure(s).");
  +        System.out.println( NEW_LINE + "Tests complete.  " + n + " tests, " + failures + " failure(s).");
       }
   }
  
  
  
  1.3       +32 -0     jakarta-regexp/xdocs/RETest.txt
  
  Index: RETest.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-regexp/xdocs/RETest.txt,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- RETest.txt	2001/02/11 23:04:23	1.2
  +++ RETest.txt	2001/02/27 08:37:07	1.3
  @@ -978,3 +978,35 @@
   www.test.com
   YES
   www.test.com
  +
  +#163
  +abc.*?x+yz
  +abcaaaaaxyzbbbbxyz
  +YES
  +abcaaaaaxyz
  +
  +#164
  +abc.+?x+yz
  +abcaaaaaxyzbbbbxyz
  +YES
  +abcaaaaaxyz
  +
  +#165
  +a.+?(c|d)
  +aaaacaaaaad
  +YES
  +aaaac
  +c
  +
  +#166
  +a.+(c|d)
  +aaaacaaaaad
  +YES
  +aaaacaaaaad
  +d
  +
  +#167
  +a+?b+?c+?
  +aaabccaaabbbccc
  +YES
  +aaabc
  
  
  

Re: cvs commit: jakarta-regexp/xdocs RETest.txt

Posted by Jon Stevens <jo...@latchkey.com>.
on 2/27/01 12:37 AM, "gholam@apache.org" <gh...@apache.org> wrote:

> CLASSPATH=$CLASSPATH:../bin/jakarta-regexp-1.3-dev.jar

You should use something like this:

  for i in ../bin/jakarta-regexp*.jar
  do
      CLASSPATH=$CLASSPATH:"$i"
  done

That way we don't have to remember to increment the version number in that
file for each release.

:-)

thanks,

-jon

-- 
If you come from a Perl or PHP background, JSP is a way to take
your pain to new levels. --Anonymous
<http://jakarta.apache.org/velocity/> && <http://java.apache.org/turbine/>