You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by bu...@apache.org on 2001/05/17 21:07:32 UTC

[Bug 1799] New - possible bug in ORO's Util.substitute

http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1799

*** shadow/1799	Thu May 17 12:07:32 2001
--- shadow/1799.tmp.20552	Thu May 17 12:07:32 2001
***************
*** 0 ****
--- 1,262 ----
+ +============================================================================+
+ | possible bug in ORO's Util.substitute                                      |
+ +----------------------------------------------------------------------------+
+ |        Bug #: 1799                        Product: ORO                     |
+ |       Status: NEW                         Version: 2.0.2                   |
+ |   Resolution:                            Platform: PC                      |
+ |     Severity: Normal                   OS/Version: Linux                   |
+ |     Priority:                           Component: Main                    |
+ +----------------------------------------------------------------------------+
+ |  Assigned To: oro-dev@jakarta.apache.org                                   |
+ |  Reported By: dshriver@sharemedia.com                                      |
+ +----------------------------------------------------------------------------+
+ |          URL:                                                              |
+ +============================================================================+
+ |                              DESCRIPTION                                   |
+ No return from Util.substitute:
+ 
+ I am a bit hesitant to report this as a bug (since it could be infinite
+ backtracking in my regex) but I notice that I never get out of a call
+ to Util.substitute in some cases, and Daniel F. Savarese suggested that
+ the problem might be in ORO.
+ 
+ Try the included test program out, it has three sets of test input, two
+ of which do not return any time soon, another works as it is supposed to.
+ 
+ __________________________________________________________________________
+ 
+ import org.apache.oro.text.regex.*;
+ import org.apache.oro.text.perl.*;
+ 
+ public class RegexTest
+     {
+     public static void main(String[] args)
+         {
+         String input = "";
+         int val = -1;
+ 
+         //the first two kill us then the thrid one is ok
+         String[] messages =
+             {
+             "The original message was received at Mon, 14 May 2001 12:04:34 
+ -0400 \n from umc97 [127.0.0.1] \n ----- The following addresses had \n
+ permanent fatal errors ----- \n <a...@b.c> \n <b...@c.d> \n <c...@d.e> \n <e...@d.f> \n
+ ----- Transcript of session follows ----- \n 550 <a...@b.c>... Host unknown (Name
+ server: b.c: host not found) \n 550 <b...@c.d>... Host unknown (Name server: c.d:
+ host not found) \n 550 <c...@d.e>... Host unknown (Name server: d.e: host not
+ found) \n 550 <e...@d.f>... Host unknown (Name server: d.f: host not found) \n
+ MESSAGE/DELIVERY-STATUS download \n From: Daniel Shriver \n To:
+ dshriver@sharemedia.com \n Subject: should trigger vacation response \n Date:
+ 5/14/01 12:04 PM \n TEXT/X-VCARD \n dshriver.vcf \n Save Address",
+             "From: Daniel Shriver <ds...@umd.sharemedia.com> \n To:
+ dshriver@umd.sharemedia.com \n Sent: \n Subject: test message to mess up email
+ regex \n  \n text \n dshriver@sharemedia.com \n text \n tabbydan@yahoo.com \n
+ text \n bob@[210.198.13.13] \n text \n x@y \n \n \"bob at\" bob@home.com \n
+ \"joe is a shmuck and a big O'l one\" <jo...@home.com>",
+             "From: Daniel Shriver <ds...@umd.sharemedia.com> \n To:
+ dshriver@umd.sharemedia.com \n Sent: \n Subject: test message to mess up email
+ regex, ok one \n \n > text \n > dshriver@sharemedia.com \n > text \n >
+ tabbydan@yahoo.com \n > text \n > bob@[210.198.13.13] \n > text \n > x@y"
+             };
+ 
+         System.out.println("We have ["+messages.length+"] messages to choose
+ from");
+ 
+         String warning = "Usage: java RegexTest #\n (where # is the number of
+ the message you want to parse as an integer, and is a value between 1 and
+ "+messages.length+")";
+ 
+         if (args.length < 1)
+             {
+             System.out.println(warning);
+             System.exit(1);
+             }
+         try
+             {
+             val = Integer.parseInt(args[0]);
+             }
+         catch (NumberFormatException e)
+             {
+             System.out.println("You did not enter an integer
+ number!\n"+warning);
+             }
+         if (val > messages.length )
+             {
+             System.out.println("You did not enter a number in the valid range (1
+ - "+messages.length+")!\n"+warning);
+             }
+         else
+             {
+             input = messages[val-1];
+             //System.out.println("DEBUG MSG: The original message
+ is:\n"+input+"\n\n");
+ 	    }
+ 
+         String regex = "([\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)((?: [^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: \\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: [^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)* \\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*|(?: [^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )*
+ \")[^()<>\\@,;:\".\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]* (?:
+ (?:\\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)|\"[^\\x80-\\xff\\n\\015\"] * (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]*
+ )* \")[^()<>\\@,;:\".\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]* )*< [\\040\\t]*
+ (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*(?: , [\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*\\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*)* :[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)?(?: [^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: \\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?: [^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\x80-\\xff\\n\\015\"] *
+ (?: [^\\x80-\\xff][^\\x80-\\xff\\n\\015\"]* )* \")[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)* \\@[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:\\.[\\040\\t]* (?: \\([^\\x80-\\xff\\n\\015()]* (?:
+ (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*(?:[^ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff]+(?![^
+ (\\040)<>\\@,;:\".\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:
+ [^\\x80-\\xff\\n\\015\\[\\]]|[^\\x80-\\xff])* \\])[\\040\\t]* (?:
+ \\([^\\x80-\\xff\\n\\015()]* (?: (?:[^\\x80-\\xff]|\\([^\\x80-\\xff\\n\\015()]*
+ (?:[^\\x80-\\xff][^\\x80-\\xff\\n\\015()]* )*\\))[^\\x80-\\xff\\n\\015()]*
+ )*\\)[\\040\\t]* )*)*>)";
+ 	int regex_flags = 41;
+         String substitution = "$1<A
+ HREF='compose?op=compose&from_link=true&recipientsTO=$2'>$2</A>";
+         int interpolations = -1;
+ 
+         System.out.println("DEBUG MSG: just about to call PerlSubstituteTool");
+ 
+         String output = PerlSubstituteTool(regex, regex_flags, substitution,
+ interpolations, input);
+ 
+         System.out.println("Your message is now:\n"+output);
+         }
+ 
+     public static String PerlSubstituteTool(String reg_ex, int reg_ex_Flags,
+         String substitution, int interpolations, String input)
+         {
+         //fail safe -- if a regex doesn't work it might be set to
+         //empty string... test for that and if we don't have a regex skip
+         //alternately a flag could have an alpha character in it (causing
+         //a number format exception (before this is called) but that is
+         //kludgy
+         if(null == reg_ex || reg_ex.equals(""))
+             {
+             //do nothing -- we'll just send back the input
+             }
+         else
+             {
+             try
+                 {
+                 String temp = null;
+                 Perl5Compiler compiler = new Perl5Compiler();
+                 Perl5Substitution sub = new Perl5Substitution(substitution);
+                 Pattern pat = compiler.compile(reg_ex, reg_ex_Flags);
+                 System.out.println("DEBUG MSG: just before Util.substitute");
+                 temp = Util.substitute( new Perl5Matcher(),
+                                         pat,
+                                         sub,
+                                         input,
+                                         interpolations );
+                 System.out.println("DEBUG MSG: just after Util.substitute");
+                 input = temp;
+                 }
+             catch (MalformedPatternException mpe)
+                 {
+                 System.out.println("DEBUG MSG: in catch
+ MalformedPatternException");
+                 System.out.println("DEBUG MSG: Exception is "+mpe.getMessage());
+                 }
+             }
+         return input;
+         }
+ 
+     }