You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oro-dev@jakarta.apache.org by bu...@apache.org on 2001/04/16 21:26:53 UTC
[Bug 1346] New - substitue gives strange result
http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1346
*** shadow/1346 Mon Apr 16 12:26:53 2001
--- shadow/1346.tmp.5869 Mon Apr 16 12:26:53 2001
***************
*** 0 ****
--- 1,178 ----
+ +============================================================================+
+ | substitue gives strange result |
+ +----------------------------------------------------------------------------+
+ | Bug #: 1346 Product: ORO |
+ | Status: NEW Version: 2.0.2 |
+ | Resolution: Platform: PC |
+ | Severity: Normal OS/Version: Linux |
+ | Priority: Component: Main |
+ +----------------------------------------------------------------------------+
+ | Assigned To: oro-dev@jakarta.apache.org |
+ | Reported By: dshriver@sharemedia.com |
+ +----------------------------------------------------------------------------+
+ | URL: |
+ +============================================================================+
+ | DESCRIPTION |
+ I am trying to find email addresses and substitute them with a link to an email
+ compose page. However, when I try the substitution I get odd results- the
+ substitute method accurately finds the email addresses but I don't have access
+ to $1.... Oddly enough when I use the same code for making URL's links to a
+ popup window containing the url content everything works ok. I think the
+ problem may be in the ORO code.
+
+ _______________________________________________________________________________
+ code and regexes
+ _______________________________________________________________________________
+ code:
+ *******************************************************************************
+
+ public static String PerlSubstituteTool(String reg_ex, int reg_ex_Flags,
+ String substitution, int interpolations, String input)
+ {
+ //fail safe -- if a regex doesn't work it might be set to
+ //empty string... test for that and if we don't have a regex skip
+ //alternately a flag could have an alpha character in it (causing
+ //a number format exception (before this is called) but that is
+ //kludgy
+ if(null == reg_ex || reg_ex.equals(""))
+ {
+ //do nothing -- we'll just send back the input
+ }
+ else
+ {
+ try
+ {
+ String temp = null;
+ Perl5Compiler compiler = new Perl5Compiler();
+ Perl5Substitution sub = new Perl5Substitution(substitution);
+ Pattern pat = compiler.compile(reg_ex, reg_ex_Flags);
+ temp = Util.substitute(new Perl5Matcher(), pat, sub, input,
+ interpolations);
+ input = temp;
+ }
+ catch (MalformedPatternException mpe)
+ {
+ System.out.println("in catch MalformedPatternException");
+ System.out.println("Exception is "+mpe.getMessage());
+ }
+ }
+ return input;
+ }
+ *******************************************************************************
+ Note: these regexes are pulled in from LDAP so the String that is generated
+ (by the time it gets pulled in by the Java code) is properly escaped
+
+ a regex that works (for URLS):
+ *******************************************************************************
+ \b((ftp|http|gopher|mailto|news|nntp|telnet|wais|file|prospero|z39.50s|z39.50r|cid|mid|vemmi|service|imap|nfs|acap|rstp|tip|pop|data|dav|opaquelocktoken|sip|tel|fax|modem|ldap|afs|tn3270|mailserver):[\w/#~:.?+=&@!\-.:?\-;!>]
+ +? ) (?= [.:?\-;!>]* [^\w/#~:.?+=&@!\-.:?\-;!>] | $ )
+
+ *******************************************************************************
+ a regex that properly finds its target but does not give me access to $1... once
+ it is done (it is from the OReilly book "Mastering Regular Expressions"
+ *******************************************************************************
+ [\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?: (?: [^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?: \.[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?: [^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*)* \@[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:[^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*|(?: [^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )*
+ ")[^()<>\@,;:".\[\]\x80-\xff\000-\010\012-\037]* (?: (?:\([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]*
+ )*\)|"[^\x80-\xff\n\015"]* (?: \[^\x80-\xff][^\x80-\xff\n\015"]* )*
+ ")[^()<>\@,;:".\[\]\x80-\xff\000-\010\012-\037]* )*< [\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\@[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*(?: , [\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*\@[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:[^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*)* :[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*)?(?: [^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?: \.[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?: [^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|"[^\x80-\xff\n\015"]* (?:
+ \[^\x80-\xff][^\x80-\xff\n\015"]* )* ")[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*)* \@[\040\t]* (?:
+ \([^\x80-\xff\n\015()]* (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:[^ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*(?:\.[\040\t]* (?: \([^\x80-\xff\n\015()]* (?:
+ (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]* (?:\[^\x80-\xff][^\x80-\xff\n\015()]*
+ )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]* )*(?:[^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff]+(?![^
+ (\040)<>\@,;:".\[\]\000-\037\x80-\xff])|\[(?:
+ [^\x80-\xff\n\015\[\]]|\[^\x80-\xff])* \])[\040\t]* (?: \([^\x80-\xff\n\015()]*
+ (?: (?:\[^\x80-\xff]|\([^\x80-\xff\n\015()]*
+ (?:\[^\x80-\xff][^\x80-\xff\n\015()]* )*\))[^\x80-\xff\n\015()]* )*\)[\040\t]*
+ )*)*>)