You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by bu...@apache.org on 2001/05/17 21:07:32 UTC
[Bug 1799] New - possible bug in ORO's Util.substitute
http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1799
*** shadow/1799 Thu May 17 12:07:32 2001
--- shadow/1799.tmp.20552 Thu May 17 12:07:32 2001
***************
*** 0 ****
--- 1,262 ----
+ +============================================================================+
+ | possible bug in ORO's Util.substitute |
+ +----------------------------------------------------------------------------+
+ | Bug #: 1799 Product: ORO |
+ | Status: NEW Version: 2.0.2 |
+ | Resolution: Platform: PC |
+ | Severity: Normal OS/Version: Linux |
+ | Priority: Component: Main |
+ +----------------------------------------------------------------------------+
+ | Assigned To: oro-dev@jakarta.apache.org |
+ | Reported By: dshriver@sharemedia.com |
+ +----------------------------------------------------------------------------+
+ | URL: |
+ +============================================================================+
+ | DESCRIPTION |
+ No return from Util.substitute:
+
+ I am a bit hesitant to report this as a bug (since it could be infinite
+ backtracking in my regex) but I notice that I never get out of a call
+ to Util.substitute in some cases, and Daniel F. Savarese suggested that
+ the problem might be in ORO.
+
+ Try the included test program out, it has three sets of test input, two
+ of which do not return any time soon, another works as it is supposed to.
+
+ __________________________________________________________________________
+
+ import org.apache.oro.text.regex.*;
+ import org.apache.oro.text.perl.*;
+
+ public class RegexTest
+ {
+ public static void main(String[] args)
+ {
+ String input = "";
+ int val = -1;
+
+ //the first two kill us then the thrid one is ok
+ String[] messages =
+ {
+ "The original message was received at Mon, 14 May 2001 12:04:34
+ -0400 \n from umc97 [127.0.0.1] \n ----- The following addresses had \n
+ permanent fatal errors ----- \n <a...@b.c> \n <b...@c.d> \n <c...@d.e> \n <e...@d.f> \n
+ ----- Transcript of session follows ----- \n 550 <a...@b.c>... Host unknown (Name
+ server: b.c: host not found) \n 550 <b...@c.d>... Host unknown (Name server: c.d:
+ host not found) \n 550 <c...@d.e>... Host unknown (Name server: d.e: host not
+ found) \n 550 <e...@d.f>... Host unknown (Name server: d.f: host not found) \n
+ MESSAGE/DELIVERY-STATUS download \n From: Daniel Shriver \n To:
+ dshriver@sharemedia.com \n Subject: should trigger vacation response \n Date:
+ 5/14/01 12:04 PM \n TEXT/X-VCARD \n dshriver.vcf \n Save Address",
+ "From: Daniel Shriver <ds...@umd.sharemedia.com> \n To:
+ dshriver@umd.sharemedia.com \n Sent: \n Subject: test message to mess up email
+ regex \n \n text \n dshriver@sharemedia.com \n text \n tabbydan@yahoo.com \n
+ text \n bob@[210.198.13.13] \n text \n x@y \n \n \"bob at\" bob@home.com \n
+ \"joe is a shmuck and a big O'l one\" <jo...@home.com>",
+ "From: Daniel Shriver <ds...@umd.sharemedia.com> \n To:
+ dshriver@umd.sharemedia.com \n Sent: \n Subject: test message to mess up email
+ regex, ok one \n \n > text \n > dshriver@sharemedia.com \n > text \n >
+ tabbydan@yahoo.com \n > text \n > bob@[210.198.13.13] \n > text \n > x@y"
+ };
+
+ System.out.println("We have ["+messages.length+"] messages to choose
+ from");
+
+ String warning = "Usage: java RegexTest #\n (where # is the number of
+ the message you want to parse as an integer, and is a value between 1 and
+ "+messages.length+")";
+
+ if (args.length < 1)
+ {
+ System.out.println(warning);
+ System.exit(1);
+ }
+ try
+ {
+ val = Integer.parseInt(args[0]);
+ }
+ catch (NumberFormatException e)
+ {
+ System.out.println("You did not enter an integer
+ number!\n"+warning);
+ }
+ if (val > messages.length )
+ {
+ System.out.println("You did not enter a number in the valid range (1
+ - "+messages.length+")!\n"+warning);
+ }
+ else
+ {
+ input = messages[val-1];
+ //System.out.println("DEBUG MSG: The original message
+ is:\n"+input+"\n\n");
+ }
+
+ String regex = "([\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)((?: [^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: \\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: [^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)* \\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*|(?: [^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )*
+ \")[^()<>\\@,;:\".\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]* (?:
+ (?:\\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)|\"[^\\x80-\\xff\\n\\015\"] * (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]*
+ )* \")[^()<>\\@,;:\".\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]* )*< [\\040\\t]*
+ (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*(?: , [\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*\\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*)* :[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)?(?: [^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: \\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: [^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)* \\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*>)";
+ int regex_flags = 41;
+ String substitution = "$1<A
+ HREF='compose?op=compose&from_link=true&recipientsTO=$2'>$2</A>";
+ int interpolations = -1;
+
+ System.out.println("DEBUG MSG: just about to call PerlSubstituteTool");
+
+ String output = PerlSubstituteTool(regex, regex_flags, substitution,
+ interpolations, input);
+
+ System.out.println("Your message is now:\n"+output);
+ }
+
+ public static String PerlSubstituteTool(String reg_ex, int reg_ex_Flags,
+ String substitution, int interpolations, String input)
+ {
+ //fail safe -- if a regex doesn't work it might be set to
+ //empty string... test for that and if we don't have a regex skip
+ //alternately a flag could have an alpha character in it (causing
+ //a number format exception (before this is called) but that is
+ //kludgy
+ if(null == reg_ex || reg_ex.equals(""))
+ {
+ //do nothing -- we'll just send back the input
+ }
+ else
+ {
+ try
+ {
+ String temp = null;
+ Perl5Compiler compiler = new Perl5Compiler();
+ Perl5Substitution sub = new Perl5Substitution(substitution);
+ Pattern pat = compiler.compile(reg_ex, reg_ex_Flags);
+ System.out.println("DEBUG MSG: just before Util.substitute");
+ temp = Util.substitute( new Perl5Matcher(),
+ pat,
+ sub,
+ input,
+ interpolations );
+ System.out.println("DEBUG MSG: just after Util.substitute");
+ input = temp;
+ }
+ catch (MalformedPatternException mpe)
+ {
+ System.out.println("DEBUG MSG: in catch
+ MalformedPatternException");
+ System.out.println("DEBUG MSG: Exception is "+mpe.getMessage());
+ }
+ }
+ return input;
+ }
+
+ }