You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by bu...@apache.org on 2001/04/16 21:26:53 UTC

[Bug 1346] New - substitue gives strange result

http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1346

*** shadow/1346	Mon Apr 16 12:26:53 2001
--- shadow/1346.tmp.5869	Mon Apr 16 12:26:53 2001
***************
*** 0 ****
--- 1,178 ----
+ +============================================================================+
+ | substitue gives strange result                                             |
+ +----------------------------------------------------------------------------+
+ |        Bug #: 1346                        Product: ORO                     |
+ |       Status: NEW                         Version: 2.0.2                   |
+ |   Resolution:                            Platform: PC                      |
+ |     Severity: Normal                   OS/Version: Linux                   |
+ |     Priority:                           Component: Main                    |
+ +----------------------------------------------------------------------------+
+ |  Assigned To: oro-dev@jakarta.apache.org                                   |
+ |  Reported By: dshriver@sharemedia.com                                      |
+ +----------------------------------------------------------------------------+
+ |          URL:                                                              |
+ +============================================================================+
+ |                              DESCRIPTION                                   |
+ I am trying to find email addresses and substitute them with a link to an email
+ compose page.  However, when I try the substitution I get odd results- the
+ substitute method accurately finds the email addresses but I don't have access
+ to $1....  Oddly enough when I use the same code for making URL's links to a
+ popup window containing the url content everything works ok.  I think the
+ problem may be in the ORO code.
+ 
+ _______________________________________________________________________________
+ code and regexes
+ _______________________________________________________________________________
+ code:
+ *******************************************************************************
+ 
+     public static String PerlSubstituteTool(String reg_ex, int reg_ex_Flags,
+         String substitution, int interpolations, String input)
+         {
+         //fail safe -- if a regex doesn't work it might be set to
+         //empty string... test for that and if we don't have a regex skip
+         //alternately a flag could have an alpha character in it (causing
+         //a number format exception (before this is called) but that is
+         //kludgy
+         if(null == reg_ex || reg_ex.equals(""))
+             {
+             //do nothing -- we'll just send back the input
+             }
+         else
+             {
+             try
+                 {
+                 String temp = null;
+                 Perl5Compiler compiler = new Perl5Compiler();
+                 Perl5Substitution sub = new Perl5Substitution(substitution);
+                 Pattern pat = compiler.compile(reg_ex, reg_ex_Flags);
+                 temp = Util.substitute(new Perl5Matcher(), pat, sub, input,
+ interpolations);
+                 input = temp;
+                 }
+             catch (MalformedPatternException mpe)
+                 {
+                 System.out.println("in catch MalformedPatternException");
+                 System.out.println("Exception is "+mpe.getMessage());
+                 }
+             }
+         return input;
+         }
+ *******************************************************************************
+ Note: these regexes are pulled in from LDAP so the String that is generated
+ (by the time it gets pulled in by the Java code) is properly escaped
+ 
+ a regex that works (for URLS):
+ *******************************************************************************
+ \b((ftp|http|gopher|mailto|news|nntp|telnet|wais|file|prospero|z39.50s|z39.50r|cid|mid|vemmi|service|imap|nfs|acap|rstp|tip|pop|data|dav|opaquelocktoken|sip|tel|fax|modem|ldap|afs|tn3270|mailserver):[\w/#~:.?+=&@!\-.:?\-;!>]
+ +? ) (?= [.:?\-;!>]* [^\w/#~:.?+=&@!\-.:?\-;!>] | $ )
+ 
+ *******************************************************************************
+ a regex that properly finds its target but does not give me access to $1... once
+ it is done (it is from the OReilly book "Mastering Regular Expressions"
+ *******************************************************************************
+ [\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?: (?: [^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?: \.[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?: [^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*)* \@[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:[^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*|(?: [^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )*
+ ")[^()<>\@,;:".\[\]\x80-\xff\000-\010\012-\037]* (?: (?:\([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]*
+ )*\)|"[^\x80-\xff\n\015"]* (?: \[^\x80-\xff][^\x80-\xff\n\015"]* )*
+ ")[^()<>\@,;:".\[\]\x80-\xff\000-\010\012-\037]* )*< [\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\@[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*(?: , [\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*\@[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:[^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*)* :[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*)?(?: [^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?: \.[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?: [^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*)* \@[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:[^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*>)