You are viewing a plain text version of this content. The canonical link for it is here.
Posted to regexp-dev@jakarta.apache.org by gh...@apache.org on 2001/02/27 09:37:08 UTC
cvs commit: jakarta-regexp/xdocs RETest.txt
gholam 01/02/27 00:37:07
Modified: build build-regexp.xml
docs RETest.txt
src/java/org/apache/regexp RECompiler.java
REDebugCompiler.java RESyntaxException.java
RETest.java
xdocs RETest.txt
Added: build run-tests.sh
Log:
Changed RESyntaxException to be an unchecked exception.
Fixed the array out of bounds exception. This was occurring when compiling
and expression with certain branches. The nextOfEnd methods tried to set the
offset of a node that was not initialised.
Fixed an infinite loop bug with reluctant closures. Not a total fix.
The relucatant closure code should be recursive like the greedy closures.
Works in most cases now though.
Added some more tests.
Allowed a user to override some of the build-script settings esp
build.compiler and related things.
Added a script to run RETest.
Fixed the error message in RETest so it prints an RE program trace on failure.
Cleaned RETest up a little bit. Can now use command line params properly.
Revision Changes Path
1.7 +1 -0 jakarta-regexp/build/build-regexp.xml
Index: build-regexp.xml
===================================================================
RCS file: /home/cvs/jakarta-regexp/build/build-regexp.xml,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- build-regexp.xml 2001/02/11 23:04:21 1.6
+++ build-regexp.xml 2001/02/27 08:37:04 1.7
@@ -8,6 +8,7 @@
<!-- =================================================================== -->
<!-- Initializes some variables -->
<!-- =================================================================== -->
+ <property file="${user.home}/.jakarta-regexp.properties"/>
<property name="ant.home" value="."/>
<property name="Name" value="Jakarta-Regexp"/>
<property name="year" value="2001"/>
1.1 jakarta-regexp/build/run-tests.sh
Index: run-tests.sh
===================================================================
#!/bin/sh
#--------------------------------------------
# No need to edit anything past here
#--------------------------------------------
if test -z "${JAVA_HOME}" ; then
echo "ERROR: JAVA_HOME not found in your environment."
echo "Please, set the JAVA_HOME variable in your environment to match the"
echo "location of the Java Virtual Machine you want to use."
exit
fi
if test -f ${JAVA_HOME}/lib/tools.jar ; then
CLASSPATH=${CLASSPATH}:${JAVA_HOME}/lib/tools.jar
fi
# convert the existing path to unix
if [ "$OSTYPE" = "cygwin32" ] || [ "$OSTYPE" = "cygwin" ] ; then
CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
fi
# Add in your .jar files first
for i in ./*.jar
do
CLASSPATH=$CLASSPATH:"$i"
done
# Add in the jakarta-site2 library files
for i in ../../jakarta-site2/lib/*.jar
do
CLASSPATH=$CLASSPATH:"$i"
done
# convert the unix path to windows
if [ "$OSTYPE" = "cygwin32" ] || [ "$OSTYPE" = "cygwin" ] ; then
CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
fi
CLASSPATH=$CLASSPATH:../bin/jakarta-regexp-1.3-dev.jar
${JAVA_HOME}/bin/java -Xint -classpath ${CLASSPATH} org.apache.regexp.RETest $1 $2
1.3 +34 -0 jakarta-regexp/docs/RETest.txt
Index: RETest.txt
===================================================================
RCS file: /home/cvs/jakarta-regexp/docs/RETest.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- RETest.txt 2001/02/11 23:04:21 1.2
+++ RETest.txt 2001/02/27 08:37:05 1.3
@@ -978,3 +978,37 @@
www.test.com
YES
www.test.com
+
+#163
+abc.*?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#164
+abc.+?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#165
+a.+?(c|d)
+aaaacaaaaad
+YES
+aaaac
+c
+
+#166
+a.+(c|d)
+aaaacaaaaad
+YES
+aaaacaaaaad
+d
+
+#167
+a+?b+?c+?
+aaabccaaabbbccc
+YES
+aaabc
+
+
1.4 +31 -10 jakarta-regexp/src/java/org/apache/regexp/RECompiler.java
Index: RECompiler.java
===================================================================
RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RECompiler.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- RECompiler.java 2001/02/11 23:04:22 1.3
+++ RECompiler.java 2001/02/27 08:37:05 1.4
@@ -71,7 +71,8 @@
* @see recompile
*
* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @version $Id: RECompiler.java,v 1.3 2001/02/11 23:04:22 jon Exp $
+ * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
+ * @version $Id: RECompiler.java,v 1.4 2001/02/27 08:37:05 gholam Exp $
*/
public class RECompiler
{
@@ -203,14 +204,29 @@
void setNextOfEnd(int node, int pointTo)
{
// Traverse the chain until the next offset is 0
- int next;
- while ((next = instruction[node + RE.offsetNext]) != 0)
- {
+ int next = instruction[node + RE.offsetNext];
+ // while the 'node' is not the last in the chain
+ // and the 'node' is not the last in the program.
+ while ( next != 0 && node < lenInstruction )
+ {
+ // if the node we are supposed to point to is in the chain then
+ // point to the end of the program instead.
+ // Michael McCallum <gh...@xtra.co.nz>
+ // FIXME: // This is a _hack_ to stop infinite programs.
+ // I believe that the implementation of the reluctant matches is wrong but
+ // have not worked out a better way yet.
+ if ( node == pointTo ) {
+ pointTo = lenInstruction;
+ }
node += next;
+ next = instruction[node + RE.offsetNext];
}
-
- // Point the last node in the chain to pointTo.
- instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
+ // if we have reached the end of the program then dont set the pointTo.
+ // im not sure if this will break any thing but passes all the tests.
+ if ( node < lenInstruction ) {
+ // Point the last node in the chain to pointTo.
+ instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
+ }
}
/**
@@ -1258,13 +1274,18 @@
setNextOfEnd(ret, end);
// Hook the ends of each branch to the end node
- for (int next = -1, i = ret; next != 0; next = instruction[i + RE.offsetNext], i += next)
+ int currentNode = ret;
+ int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+ // while the next node o
+ while ( nextNodeOffset != 0 && currentNode < lenInstruction )
{
// If branch, make the end of the branch's operand chain point to the end node.
- if (instruction[i + RE.offsetOpcode] == RE.OP_BRANCH)
+ if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
{
- setNextOfEnd(i + RE.nodeSize, end);
+ setNextOfEnd( currentNode + RE.nodeSize, end );
}
+ nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+ currentNode += nextNodeOffset;
}
// Return the node list
1.2 +35 -1 jakarta-regexp/src/java/org/apache/regexp/REDebugCompiler.java
Index: REDebugCompiler.java
===================================================================
RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/REDebugCompiler.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- REDebugCompiler.java 2000/04/27 01:22:33 1.1
+++ REDebugCompiler.java 2001/02/27 08:37:06 1.2
@@ -65,7 +65,7 @@
* for debugging purposes.
*
* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @version $Id: REDebugCompiler.java,v 1.1 2000/04/27 01:22:33 jon Exp $
+ * @version $Id: REDebugCompiler.java,v 1.2 2001/02/27 08:37:06 gholam Exp $
*/
public class REDebugCompiler extends RECompiler
{
@@ -95,6 +95,8 @@
hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE");
hashOpcode.put(new Integer(RE.OP_BACKREF), "OP_BACKREF");
hashOpcode.put(new Integer(RE.OP_POSIXCLASS), "OP_POSIXCLASS");
+ hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER), "OP_OPEN_CLUSTER");
+ hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER), "OP_CLOSE_CLUSTER");
}
/**
@@ -146,6 +148,38 @@
// Return opcode as a string and opdata value
return opcodeToString(opcode) + ", opdata = " + opdata;
}
+
+ /**
+ * Inserts a node with a given opcode and opdata at insertAt. The node relative next
+ * pointer is initialized to 0.
+ * @param opcode Opcode for new node
+ * @param opdata Opdata for new node (only the low 16 bits are currently used)
+ * @param insertAt Index at which to insert the new node in the program * /
+ void nodeInsert(char opcode, int opdata, int insertAt) {
+ System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
+ PrintWriter writer = new PrintWriter( System.out );
+ dumpProgram( writer );
+ super.nodeInsert( opcode, opdata, insertAt );
+ System.out.println( "====< " );
+ dumpProgram( writer );
+ writer.flush();
+ }/**/
+
+
+ /**
+ * Appends a node to the end of a node chain
+ * @param node Start of node chain to traverse
+ * @param pointTo Node to have the tail of the chain point to * /
+ void setNextOfEnd(int node, int pointTo) {
+ System.out.println( "====> " + node + " " + pointTo );
+ PrintWriter writer = new PrintWriter( System.out );
+ dumpProgram( writer );
+ super.setNextOfEnd( node, pointTo );
+ System.out.println( "====< " );
+ dumpProgram( writer );
+ writer.flush();
+ }/**/
+
/**
* Dumps the current program to a PrintWriter
1.2 +8 -3 jakarta-regexp/src/java/org/apache/regexp/RESyntaxException.java
Index: RESyntaxException.java
===================================================================
RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RESyntaxException.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- RESyntaxException.java 2000/04/27 01:22:33 1.1
+++ RESyntaxException.java 2001/02/27 08:37:06 1.2
@@ -55,15 +55,20 @@
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
- */
+ */
/**
* Exception thrown to indicate a syntax error in a regular expression.
+ * This is a non-checked exception because you should only have problems compiling
+ * a regular expression during development.
+ * If you are making regular expresion programs dynamically then you can catch it
+ * if you wish. But should not be forced to.
*
* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
- * @version $Id: RESyntaxException.java,v 1.1 2000/04/27 01:22:33 jon Exp $
+ * @author <a href="mailto:gholam@xtra.co.nz>Michael McCallum</a>
+ * @version $Id: RESyntaxException.java,v 1.2 2001/02/27 08:37:06 gholam Exp $
*/
-public class RESyntaxException extends Exception
+public class RESyntaxException extends RuntimeException
{
/**
* Constructor.
1.4 +55 -30 jakarta-regexp/src/java/org/apache/regexp/RETest.java
Index: RETest.java
===================================================================
RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- RETest.java 2001/02/11 23:04:22 1.3
+++ RETest.java 2001/02/27 08:37:06 1.4
@@ -21,7 +21,7 @@
* distribution.
*
* 3. The end-user documentation included with the redistribution, if
- * any, must include the following acknowlegement:
+ * any, must include the following acknowlegement:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowlegement may appear in the software itself,
@@ -57,7 +57,11 @@
*
*/
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+import java.io.File;
/**
* Data driven (and optionally interactive) testing harness to exercise regular
@@ -65,17 +69,21 @@
*
* @author <a href="mailto:jonl@muppetlabs.com">Jonathan Locke</a>
* @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
- * @version $Id: RETest.java,v 1.3 2001/02/11 23:04:22 jon Exp $
+ * @author <a href="mailto:gholam@xtra.co.nz">Michael McCallum</a>
+ * @version $Id: RETest.java,v 1.4 2001/02/27 08:37:06 gholam Exp $
*/
public class RETest
{
- // Construct a matcher and a debug compiler
- RE r = new RE();
- REDebugCompiler compiler = new REDebugCompiler();
-
// True if we want to see output from success cases
static final boolean showSuccesses = false;
+ // A new line character.
+ static final String NEW_LINE = System.getProperty( "line.separator" );
+
+ // Construct a matcher and a debug compiler
+ RE r = new RE();
+ REDebugCompiler compiler = new REDebugCompiler();
+
/**
* Main program entrypoint. If an argument is given, it will be compiled
* and interactive matching will ensue. If no argument is given, the
@@ -141,13 +149,16 @@
r.setProgram(compiler.compile(expr));
// Show expression
- say("\n" + expr + "\n");
+ say("" + NEW_LINE + "" + expr + "" + NEW_LINE + "");
// Show program for compiled expression
- compiler.dumpProgram(new PrintWriter(System.out));
+ PrintWriter writer = new PrintWriter( System.out );
+ compiler.dumpProgram( writer );
+ writer.flush();
+ boolean running = true;
// Test matching against compiled expression
- while (true)
+ while ( running )
{
// Read from keyboard
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
@@ -155,18 +166,26 @@
System.out.flush();
String match = br.readLine();
- // Try a match against the keyboard input
- if (r.match(match))
+ if ( match != null )
{
- say("Match successful.");
+ // Try a match against the keyboard input
+ if (r.match(match))
+ {
+ say("Match successful.");
+ }
+ else
+ {
+ say("Match failed.");
+ }
+
+ // Show subparen registers
+ showParens(r);
}
else
{
- say("Match failed.");
+ running = false;
+ System.out.println();
}
-
- // Show subparen registers
- showParens(r);
}
}
catch (Exception e)
@@ -187,21 +206,25 @@
}
/**
- * Fail with an error
- * @param s Failure description
+ * Fail with an error.
+ * Will print a big failure message to System.out.
+ * @param s Failure description
*/
void fail(String s)
{
failures++;
- say("\n");
+ say("" + NEW_LINE + "");
say("*******************************************************");
say("********************* FAILURE! **********************");
say("*******************************************************");
- say("\n");
+ say("" + NEW_LINE + "");
say(s);
- say("");
- compiler.dumpProgram(new PrintWriter(System.out));
- say("\n");
+ say("");
+ // make sure the writer gets flushed.
+ PrintWriter writer = new PrintWriter( System.out );
+ compiler.dumpProgram( writer );
+ writer.flush();
+ say("" + NEW_LINE + "");
}
/**
@@ -231,7 +254,7 @@
*/
void show()
{
- say("\n-----------------------\n");
+ say("" + NEW_LINE + "-----------------------" + NEW_LINE + "");
say("Expression #" + (n) + " \"" + expr + "\" ");
}
@@ -371,7 +394,9 @@
}
// Wasn't supposed to be an error
- fail("Produces the unexpected error \"" + e.getMessage() + "\"");
+ String message = e.getMessage() == null ? e.toString() : e.getMessage();
+ fail("Produces an unexpected exception \"" + message + "\"");
+ e.printStackTrace();
}
catch (Error e)
{
@@ -472,14 +497,14 @@
}
// Matcher blew it
- catch (Exception e)
+ catch(Exception e)
{
fail("Matcher threw exception: " + e.toString());
e.printStackTrace();
}
// Internal error
- catch (Error e)
+ catch(Error e)
{
fail("Matcher threw fatal error \"" + e.getMessage() + "\"");
e.printStackTrace();
@@ -492,9 +517,9 @@
}
// Show match time
- System.out.println ("\n\nMatch time = " + (System.currentTimeMillis() - ms) + " ms.");
+ System.out.println( NEW_LINE + NEW_LINE + "Match time = " + (System.currentTimeMillis() - ms) + " ms.");
// Print final results
- System.out.println ("\nTests complete. " + n + " tests, " + failures + " failure(s).");
+ System.out.println( NEW_LINE + "Tests complete. " + n + " tests, " + failures + " failure(s).");
}
}
1.3 +32 -0 jakarta-regexp/xdocs/RETest.txt
Index: RETest.txt
===================================================================
RCS file: /home/cvs/jakarta-regexp/xdocs/RETest.txt,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- RETest.txt 2001/02/11 23:04:23 1.2
+++ RETest.txt 2001/02/27 08:37:07 1.3
@@ -978,3 +978,35 @@
www.test.com
YES
www.test.com
+
+#163
+abc.*?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#164
+abc.+?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#165
+a.+?(c|d)
+aaaacaaaaad
+YES
+aaaac
+c
+
+#166
+a.+(c|d)
+aaaacaaaaad
+YES
+aaaacaaaaad
+d
+
+#167
+a+?b+?c+?
+aaabccaaabbbccc
+YES
+aaabc
Re: cvs commit: jakarta-regexp/xdocs RETest.txt
Posted by Jon Stevens <jo...@latchkey.com>.
on 2/27/01 12:37 AM, "gholam@apache.org" <gh...@apache.org> wrote:
> CLASSPATH=$CLASSPATH:../bin/jakarta-regexp-1.3-dev.jar
You should use something like this:
for i in ../bin/jakarta-regexp*.jar
do
CLASSPATH=$CLASSPATH:"$i"
done
That way we don't have to remember to increment the version number in that
file for each release.
:-)
thanks,
-jon
--
If you come from a Perl or PHP background, JSP is a way to take
your pain to new levels. --Anonymous
<http://jakarta.apache.org/velocity/> && <http://java.apache.org/turbine/>