You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by vg...@apache.org on 2004/04/22 14:15:49 UTC
cvs commit: cocoon-2.1/src/blocks/linkrewriter/java/org/apache/cocoon/transformation LinkRewriterTransformer.java
vgritsenko 2004/04/22 05:15:48
Modified: . status.xml
src/blocks/linkrewriter/java/org/apache/cocoon/transformation
LinkRewriterTransformer.java
Log:
LinkRewriter: Added support for rewriting URLs which are embedded into
the attribute values (example: style attribute with background image URL).
Such URLs can be extracted using RE expressions and re-written, leaving
rest of the attribute value intact.
Revision Changes Path
1.305 +9 -2 cocoon-2.1/status.xml
Index: status.xml
===================================================================
RCS file: /home/cvs/cocoon-2.1/status.xml,v
retrieving revision 1.304
retrieving revision 1.305
diff -u -r1.304 -r1.305
--- status.xml 17 Apr 2004 06:13:05 -0000 1.304
+++ status.xml 22 Apr 2004 12:15:48 -0000 1.305
@@ -212,6 +212,12 @@
<changes>
<release version="@version@" date="@date@">
+ <action dev="VG" type="update">
+ LinkRewriter: Added support for rewriting URLs which are embedded into
+ the attribute values (example: style attribute with background image URL).
+ Such URLs can be extracted using RE expressions and re-written, leaving
+ rest of the attribute value intact.
+ </action>
<action dev="AG" type="update">
Updated Velocity to 1.4 and iText to 1.02b
</action>
@@ -220,7 +226,8 @@
Added standalone build target "validate-stylesheets".
</action>
<action dev="AG" type="update">
- Updated Xindice to 1.1b4, xmldb-api to 20030701, xmldb-common to 20030701 and xmldb-xupdate to 20040205
+ Updated Xindice to 1.1b4, xmldb-api to 20030701, xmldb-common to 20030701
+ and xmldb-xupdate to 20040205
</action>
<action dev="AG" type="update">
Updated Jetty to 4.2.19
1.12 +413 -183 cocoon-2.1/src/blocks/linkrewriter/java/org/apache/cocoon/transformation/LinkRewriterTransformer.java
Index: LinkRewriterTransformer.java
===================================================================
RCS file: /home/cvs/cocoon-2.1/src/blocks/linkrewriter/java/org/apache/cocoon/transformation/LinkRewriterTransformer.java,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- LinkRewriterTransformer.java 5 Mar 2004 13:01:59 -0000 1.11
+++ LinkRewriterTransformer.java 22 Apr 2004 12:15:48 -0000 1.12
@@ -1,12 +1,12 @@
/*
* Copyright 1999-2004 The Apache Software Foundation.
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,8 +16,8 @@
package org.apache.cocoon.transformation;
import java.io.IOException;
+import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
@@ -26,203 +26,354 @@
import org.apache.avalon.framework.activity.Initializable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.parameters.ParameterException;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.components.language.markup.xsp.XSPModuleHelper;
import org.apache.cocoon.environment.SourceResolver;
import org.apache.cocoon.transformation.helpers.VariableConfiguration;
+import org.apache.regexp.RE;
+import org.apache.regexp.RECompiler;
+import org.apache.regexp.REProgram;
+import org.apache.regexp.RESyntaxException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
-/**
+/**
* Rewrites URIs in links to a value determined by an InputModule.
* The URI scheme identifies the InputModule to use, and the rest of the URI is
* used as the attribute name.
+ *
* <h3>Example</h3>
- * For instance, if we had an {@link
+ * <p>For instance, if we had an {@link
* org.apache.cocoon.components.modules.input.XMLFileModule}, configured to
* read values from an XML file:
* <pre>
- * <site>
- * <faq>
- * <how_to_boil_eggs href="faq/eggs.html"/>
- * </faq>
- * </site>
+ * <site>
+ * <faq>
+ * <how_to_boil_eggs href="faq/eggs.html"/>
+ * </faq>
+ * </site>
* </pre>
- * mapped to the prefix 'site:', then <link
- * href="site:/site/faq/how_to_boil_eggs/@href"> would be replaced with
- * <link href="faq/eggs.html">
- * <p>
- * InputModules are configured twice; first statically in
+ *
+ * mapped to the prefix 'site:', then <code><link
+ * href="site:/site/faq/how_to_boil_eggs/@href"></code> would be replaced
+ * with <code><link href="faq/eggs.html"></code>
+ *
+ * <h3>InputModule Configuration</h3>
+ * <p>InputModules are configured twice; first statically in
* <code>cocoon.xconf</code>, and then dynamically at runtime, with dynamic
- * configuration (if any) taking precedence. LinkRewriterTransformer allows
+ * configuration (if any) taking precedence. Transformer allows
* you to pass a dynamic configuration to used InputModules as follows.
- * <p>
- * First, a template Configuration is specified in the static
- * <map:components> block of the sitemap:
+ *
+ * <p>First, a template Configuration is specified in the static
+ * <map:components> block of the sitemap within <input-module> tags:
* <pre>
* <map:transformer name="linkrewriter"
- * src="org.apache.cocoon.transformation.LinkRewriterTransformer">
- * <link-attrs>href src</link-attrs>
- * <schemes>site ext</schemes>
- * <input-module name="site">
- * <file src="cocoon://samples/link/linkmap" reloadable="true"/>
- * </input-module>
- * <input-module name="mapper">
- * <input-module name="site">
- * <file src="{src}" reloadable="true"/>
- * </input-module>
- * <prefix>/site/</prefix>
- * <suffix>/@href</suffix>
- * </input-module>
- * </map:transformer>
+ * src="org.apache.cocoon.transformation.LinkRewriterTransformer">
+ * <link-attrs>href src</link-attrs>
+ * <schemes>site ext</schemes>
+ * <input-module name="site">
+ * <file src="cocoon://samples/link/linkmap" reloadable="true"/>
+ * </input-module>
+ * <input-module name="mapper">
+ * <input-module name="site">
+ * <file src="{src}" reloadable="true"/>
+ * </input-module>
+ * <prefix>/site/</prefix>
+ * <suffix>/@href</suffix>
+ * </input-module>
+ * </map:transformer>
* </pre>
+ *
* Here, we have first configured which attributes to examine, and which URL
- * schemes to consider rewriting. In this example, <a href="site:index"> would
- * be processed. See below for more configuration options.
- * Then, we have established dynamic configuration templates for two modules,
+ * schemes to consider rewriting. In this example, <a href="site:index">
+ * would be processed. See below for more configuration options.
+ *
+ * <p>Then, we have established dynamic configuration templates for two modules,
* 'site' (an {@link org.apache.cocoon.components.modules.input.XMLFileModule}
* and 'mapper' (A {@link
* org.apache.cocoon.components.modules.input.SimpleMappingMetaModule}. All
- * other InputModules will use their static configs. Note that, when
- * configuring a Meta InputModule like 'mapper', we need to also configure the
- * 'inner' module (here, 'site') with a nested <input-module>.
- * <p>
- * There is one further twist; to have <em>really</em> dynamic configuration,
- * we need information available only when the transformer actually runs. This
- * is why the above config was called a "template" Configuration; it needs to
+ * other InputModules will use their static configs. Note that, when
+ * configuring a meta InputModule like 'mapper', we need to also configure the
+ * 'inner' module (here, 'site') with a nested <input-module>.
+ *
+ * <p>There is one further twist; to have <em>really</em> dynamic configuration,
+ * we need information available only when the transformer actually runs. This
+ * is why the above config was called a "template" configuration; it needs to
* be 'instantiated' and provided extra info, namely:
* <ul>
* <li>The {src} string will be replaced with the map:transform @src attribute value.
* <li>Any other {variables} will be replaced with map:parameter values
* </ul>
+ *
* With the above config template, we can have a matcher like:
*
* <pre>
- * <map:match pattern="**welcome">
- * <map:generate src="index.xml"/>
- * <map:transform type="linkrewriter" src="cocoon:/{1}linkmap"/>
- * <map:serialize type="xml"/>
- * </map:match>
+ * <map:match pattern="**welcome">
+ * <map:generate src="index.xml"/>
+ * <map:transform type="linkrewriter" src="cocoon:/{1}linkmap"/>
+ * <map:serialize type="xml"/>
+ * </map:match>
* </pre>
*
* Which would cause the 'mapper' XMLFileModule to be configured with a
* different XML file, depending on the request.
- * <p>
- * Similarly, we could use a dynamic prefix:
+ *
+ * <p>Similarly, we could use a dynamic prefix:
* <pre>
- * <prefix>{prefix}</prefix>
+ * <prefix>{prefix}</prefix>
* </pre>
* in the template config, and:
* <pre>
- * <map:parameter name="prefix" value="/site/"/>
+ * <map:parameter name="prefix" value="/site/"/>
* </pre>
* in the map:transform
- * <p>
- * A live example of LinkRewriterTransformer can be found in the <a
+ *
+ * <p>A live example of LinkRewriterTransformer can be found in the <a
* href="http://xml.apache.org/forrest/">Apache Forrest</a> sitemap.
- * <p>
*
- * <h3>Configuration</h3>
+ * <h3>Transformer Configuration</h3>
* <p>
- * The following map:parameter's and map:transformer parameters are recognised:
+ * The following configuration entries in map:transformer block are recognised:
* <dl>
- * <dt>link-attrs</dt>
- * <dd>Space-separated list of attributes to consider links (to be
- * transformed). Defaults to 'href'.</dd>
- * <dt>schemes</dt>
- * <dd>Space-separated list of URI schemes to explicitly include. If specified, all URIs with unlisted schemes will not be converted.</dd>
- * <dt>exclude-schemes</dt>
- * <dd>Space-separated list of URI schemes to explicitly exclude. Defaults to 'http https ftp news mailto'.</dd>
- * <dt>bad-link-str</dt>
- * <dd>String to use for links with a correct InputModule prefix, but no value
- * therein. Defaults to the original URI.</dd>
+ * <dt>link-attrs</dt>
+ * <dd>Space-separated list of attributes to consider links (to be
+ * transformed). The whole value of the attribute is considered link and
+ * transformed.</dd>
+ *
+ * <dt>link-attr</dt>
+ * <dd>0..n of these elements each specify an attribute containing link(s)
+ * (to be transformed) and optionally a regular expression to locate
+ * substring(s) of the attribute value considered link(s). Has two
+ * attributes:
+ * <dl>
+ * <dt>name</dt>
+ * <dd>(required) name of the attribute whose value contains link(s).</dd>
+ * <dt>pattern</dt>
+ * <dd>(optional) regular expression such that when matched against the
+ * attribute value, all parenthesized expressions (except number 0) will
+ * be considered links that should be transformed. If absent, the whole value
+ * of the attribute is considered to be a link, as if the attribute was
+ * included in 'link-attrs'.</dd>
+ * </dl>
+ * </dd>
+ *
+ * <dt>schemes</dt>
+ * <dd>Space-separated list of URI schemes to explicitly include.
+ * If specified, all URIs with unlisted schemes will <i>not</i> be converted.</dd>
+ *
+ * <dt>exclude-schemes</dt>
+ * <dd>Space-separated list of URI schemes to explicitly exclude.
+ * Defaults to 'http https ftp news mailto'.</dd>
+ *
+ * <dt>bad-link-str</dt>
+ * <dd>String to use for links with a correct InputModule prefix, but no value
+ * therein. Defaults to the original URI.</dd>
* </dl>
*
* <p>
- * Note that currently, only links in the default ("") namespace are converted.
+ * The attributes considered to contain links are a <em>set</em> of the attributes
+ * specified in 'link-attrs' element and all 'link-attr' elements. Each attribute
+ * should be specified only once either in 'link-attrs' or 'link-attr'; i.e. an
+ * attribute can have at most 1 regular expression associated with it. If neither
+ * 'link-attrs' nor 'link-attr' configuration is present, defaults to 'href'.
+ *
+ * <p>Below is an example of regular expression usage that will transform links
+ * <code>x1</code> and <code>x2</code> in
+ * <code><action target="foo url(x1) bar url(x2)"/></code>:
+ *
+ * <pre>
+ * <map:transformer name="linkrewriter"
+ * src="org.apache.cocoon.transformation.LinkRewriterTransformer">
+ * <link-attr name="target" pattern="(?:url\((.*?)\).*?){1,2}$"/>
+ * <!-- additional configuration ... -->
+ * </map:transformer>
+ * </pre>
+ *
+ * <p>
+ * When matched against the value of <code>target</code> attribute above,
+ * the parenthesized expressions are:<br/>
+ * <samp>
+ * $0 = url(x1) bar url(x2)<br/>
+ * $1 = x1<br/>
+ * $2 = x2<br/>
+ * </samp>
+ *
+ * <p>
+ * Expression number 0 is always discarded by the transformer and the rest
+ * are considered links and re-written.
+ *
+ * <p>If present, map:parameter's from the map:transform block override the
+ * corresponding configuration entries from map:transformer. As an exception,
+ * 'link-attr' parameters are not recognised; 'link-attrs' parameter overrides
+ * both 'link-attrs' and 'link-attr' configuration.
+ *
+ * <p>
+ * <b>NOTE:</b> Currently, only links in the default ("") namespace are converted.
*
- * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
* @version CVS $Id$
*/
public class LinkRewriterTransformer
- extends AbstractSAXTransformer implements Initializable, Disposable
-{
+ extends AbstractSAXTransformer
+ implements Initializable, Disposable {
- private static String NAMESPACE="";
+ private final static String NAMESPACE = "";
- /** A list of attributes considered 'links' */
- private Set linkAttrs;
+ /**
+ * A guardian object denoting absense of regexp pattern for a given
+ * attribute. Used as value in linkAttrs and origLinkAttrs maps.
+ */
+ private final static Object NO_REGEXP = new Object();
- /** List containing schemes (protocols) of links to log */
- private Set inSchemes;
- private Set outSchemes;
+ //
+ // Configure()'d parameters
+ //
/** Configuration passed to the component once through configure(). */
- private Configuration origConf;
+ private Configuration origConf;
- /** Derivation of origConf with variables obtained from setup() parameters.
- * Recreated once per invocation. */
- private Configuration conf;
+ private String origBadLinkStr;
+ private String origInSchemes;
+ private String origOutSchemes;
- private XSPModuleHelper modHelper;
+ /**
+ * A map where keys are those attributes which are considered 'links'.
+ * Obtained from configuration passed to the component once through
+ * the configure() method.
+ *
+ * <p>Map contains NO_REGEXP object for attributes whose whole values are
+ * considered links, or compiled RE expressions for attributes whose values
+ * might contain a link.
+ */
+ private Map origLinkAttrs;
+
+ //
+ // Setup()'d parameters
+ //
+
+ /**
+ * Derivation of origConf with variables obtained from setup() parameters.
+ * Recreated once per invocation.
+ */
+ private Configuration conf;
+ /**
+ * String to use for links with a correct InputModule prefix, but no value
+ * therein.
+ */
private String badLinkStr;
+ /** Set containing schemes (protocols) of links to process */
+ private Set inSchemes;
+
+ /** Set containing schemes (protocols) of links to exclude from processing */
+ private Set outSchemes;
+
+ /**
+ * A map of attributes considered 'links' and corresponding RE expression
+ * or NO_REGEXP object. Recreated once per invocation or copied from
+ * origLinkAttrs based on setup() method parameters.
+ */
+ private Map linkAttrs;
+
+ private XSPModuleHelper modHelper;
+
+
/**
* Configure this component from the map:transformer block. Called before
* initialization and setup.
*/
- public void configure(Configuration conf)
- throws ConfigurationException {
+ public void configure(Configuration conf) throws ConfigurationException {
super.configure(conf);
+
this.origConf = conf;
+ this.origBadLinkStr = conf.getChild("bad-link-str").getValue(null);
+ this.origInSchemes = conf.getChild("schemes").getValue("");
+ this.origOutSchemes = conf.getChild("exclude-schemes").getValue("http https ftp news mailto");
+
+ /*
+ * Setup origLinkAttrs map from the original Configuration:
+ * 1. Parse link-attrs Configuration
+ * 2. Process link-attr Children, warn if overwriting
+ * 3. If no link-attrs, and no link-attr are available, defaults to "href"
+ */
+
+ String linkAttrsValue = conf.getChild("link-attrs").getValue("");
+ this.origLinkAttrs = split(linkAttrsValue, " ", NO_REGEXP);
+
+ Configuration[] attrConfs = conf.getChildren("link-attr");
+ if (attrConfs.length > 0) {
+ RECompiler compiler = new RECompiler();
+ for (int i = 0; i < attrConfs.length; i++) {
+ String attr = attrConfs[i].getAttribute("name");
+ if (getLogger().isWarnEnabled() && origLinkAttrs.containsKey(attr)) {
+ getLogger().warn("Duplicate configuration entry found for attribute '" +
+ attr + "', overwriting previous configuration");
+ }
+
+ String pattern = attrConfs[i].getAttribute("pattern", null);
+ if (pattern == null) {
+ this.origLinkAttrs.put(attr, NO_REGEXP);
+ } else {
+ try {
+ this.origLinkAttrs.put(attr, compiler.compile(pattern));
+ } catch (RESyntaxException e) {
+ String msg = "Invalid regexp pattern '" + pattern + "' specified for attribute '" + attr + "'";
+ throw new ConfigurationException(msg, attrConfs[i], e);
+ }
+ }
+ }
+ }
+
+ // If nothing configured, default to href attribute
+ if (this.origLinkAttrs.size() == 0) {
+ this.origLinkAttrs.put("href", NO_REGEXP);
+ }
}
-
+
/**
* Initiate resources prior to this component becoming active.
*/
public void initialize() throws Exception {
this.namespaceURI = NAMESPACE;
this.modHelper = new XSPModuleHelper();
- modHelper.setup(this.manager);
+ this.modHelper.setup(this.manager);
}
/**
* Setup this component to handle a map:transform instance.
*/
- public void setup(SourceResolver resolver, Map objectModel,
- String src, Parameters parameters)
- throws ProcessingException, SAXException, IOException
- {
+ public void setup(SourceResolver resolver,
+ Map objectModel,
+ String src,
+ Parameters parameters)
+ throws ProcessingException, SAXException, IOException {
super.setup(resolver, objectModel, src, parameters);
- this.badLinkStr = parameters.getParameter("bad-link-str", // per-request config
- origConf.getChild("bad-link-str"). // else fall back to per-instance config
- getValue(null) // else use hardcoded default
- );
- this.linkAttrs = split(parameters.getParameter("link-attrs",
- origConf.getChild("link-attrs").
- getValue("href")
- ), " ");
- this.inSchemes = split(parameters.getParameter("schemes",
- origConf.getChild("schemes").
- getValue("")
- ), " ");
- this.outSchemes = split(parameters.getParameter("exclude-schemes",
- origConf.getChild("exclude-schemes").
- getValue("http https ftp news mailto")
- ), " ");
- if (getLogger().isDebugEnabled()) {
- getLogger().debug("bad-link-str = "+badLinkStr);
- getLogger().debug("link-attrs = "+linkAttrs);
- getLogger().debug("schemes = "+inSchemes);
- getLogger().debug("exclude-schemes = "+outSchemes);
+
+ this.badLinkStr = parameters.getParameter("bad-link-str", // per-request config
+ this.origBadLinkStr); // else fall back to per-instance config
+
+ this.inSchemes = split(parameters.getParameter("schemes", this.origInSchemes), " ");
+ this.outSchemes = split(parameters.getParameter("exclude-schemes", this.origOutSchemes), " ");
+
+ this.linkAttrs = this.origLinkAttrs;
+ if (parameters.isParameter("link-attrs")) {
+ try {
+ this.linkAttrs = split(parameters.getParameter("link-attrs"), " ", NO_REGEXP);
+ } catch (ParameterException ex) {
+ // shouldn't happen
+ }
}
if (getLogger().isDebugEnabled()) {
- getLogger().debug("Will ignore the following schemes: " + outSchemes);
+ getLogger().debug("bad-link-str = " + badLinkStr);
+ getLogger().debug("link-attrs = " + linkAttrs);
+ getLogger().debug("schemes = " + inSchemes);
+ getLogger().debug("exclude-schemes = " + outSchemes);
}
+
// Generate conf
VariableConfiguration varConf = new VariableConfiguration(this.origConf);
varConf.addVariable("src", src);
@@ -237,32 +388,63 @@
/** Recycle this component for use in another map:transform. */
public void recycle() {
super.recycle();
- this.resolver = null;
+
+ // Note: configure() and initialize() are not called after every
+ // recycle, so don't null origConf, origLinkAttrs, etc.
+ this.conf = null;
+ this.badLinkStr = null;
this.linkAttrs = null;
this.inSchemes = null;
this.outSchemes = null;
- this.conf = null;
- // Note: configure() and initialize() are not called after every
- //recycle, so don't null origConf
}
- /** Split a string into a Set of strings.
+ /**
+ * Split a string into a Set of strings.
+ *
* @param str String to split
* @param delim Delimiter character
* @return A Set of strings in 'str'
*/
private Set split(String str, String delim) {
- if (str == null) return null;
- Set schemes = new HashSet();
+ if (str == null) {
+ return null;
+ }
+
+ Set tokens = new HashSet();
+ StringTokenizer st = new StringTokenizer(str, delim);
+ while (st.hasMoreTokens()) {
+ tokens.add(st.nextToken());
+ }
+ return tokens;
+ }
+
+ /**
+ * Split a string and create a Map where keys are the tokens from the string.
+ *
+ * @param str String to split
+ * @param delim Delimiter character
+ * @param valueObj Object to insert in the Map (may be null)
+ * @return A Map of strings in 'str'
+ */
+ private Map split(String str, String delim, Object valueObj) {
+ if (str == null) {
+ return null;
+ }
+
+ // valueObj may be null, because HashMap permits null values
+ Map schemes = new HashMap();
StringTokenizer st = new StringTokenizer(str, delim);
while (st.hasMoreTokens()) {
String pfx = st.nextToken();
- schemes.add(pfx);
+ if (schemes.containsKey(pfx) && getLogger().isWarnEnabled()) {
+ getLogger().warn("Duplicate configuration entry found for attribute '" +
+ pfx + "', overwriting previous configuration");
+ }
+ schemes.put(pfx, valueObj);
}
return schemes;
}
-
/**
* Start processing elements of our namespace.
* This hook is invoked for each sax event with our namespace.
@@ -272,86 +454,135 @@
* @param attr The attributes of the element.
*/
public void startTransformingElement(String uri,
- String name,
- String raw,
- Attributes attr)
- throws ProcessingException, IOException, SAXException
- {
- Attributes newAttrs = null;
+ String name,
+ String raw,
+ Attributes attr)
+ throws ProcessingException, IOException, SAXException {
boolean matched = false;
- Iterator iter = linkAttrs.iterator();
- while (iter.hasNext()) {
- int attrIdx = attr.getIndex((String)iter.next());
- if (attrIdx != -1) {
- String oldAttr = attr.getValue(attrIdx);
- int i = oldAttr.indexOf(":");
- if (i != -1) {
- String scheme = oldAttr.substring(0, i);
- String addr = oldAttr.substring(i+1);
- if (outSchemes.contains(scheme)) {
- if (getLogger().isDebugEnabled()) {
- getLogger().debug("Ignoring link '"+scheme+":"+addr+"'");
- }
- } else if (inSchemes.contains(scheme)) {
- matched = true;
- newAttrs = getLinkAttr(attr, attrIdx, scheme, addr);
- if (getLogger().isDebugEnabled()) {
- getLogger().debug("Converted link '"+oldAttr+"' to '"+newAttrs.getValue(attrIdx)+"'");
- }
- } else {
- if (inSchemes.size() == 0) {
- // If the link wasn't deliberately excluded from a
- // list of 'good' links, then include it.
- matched = true;
- newAttrs = getLinkAttr(attr, attrIdx, scheme, addr);
- getLogger().debug("Converted link '"+oldAttr+"' to '"+newAttrs.getValue(attrIdx)+"'");
- }
- }
+ for (int attrIdx = 0; attrIdx < attr.getLength(); attrIdx++) {
+ String attrName = attr.getQName(attrIdx);
+
+ String attrValue = createTransformedAttr(attrName, attr.getValue(attrIdx));
+ if (attrValue != null) {
+ if (!matched) {
+ attr = new AttributesImpl(attr);
+ matched = true;
}
+ ((AttributesImpl) attr).setValue(attrIdx, attrValue);
}
}
- if (matched) {
- super.startTransformingElement(uri, name, raw, newAttrs);
+ super.startTransformingElement(uri, name, raw, attr);
+ }
+
+ /**
+ * Rewrite set of links in an attribute.
+ *
+ * @param attrName QName of the attribute containing unconverted link(s).
+ * @param oldAttrValue value of the attribute containing unconverted link(s).
+ * @return new value of the attribute based on <code>oldAttrValue</code>, but with link(s) rewritten. If not
+ * modified, returns null (for example, if attribute not found in <code>linkAttrs</code> or not matched to
+ * regexp pattern).
+ */
+ private String createTransformedAttr(
+ String attrName,
+ String oldAttrValue) {
+ if (!linkAttrs.containsKey(attrName)) {
+ return null;
+ }
+
+ String newAttrValue = null;
+ Object reProgram = linkAttrs.get(attrName);
+ if (reProgram == NO_REGEXP) {
+ newAttrValue = createTransformedLink(oldAttrValue);
} else {
- super.startTransformingElement(uri, name, raw, attr);
+ // must be instanceof REProgram
+ RE r = new RE((REProgram) reProgram);
+ if (r.match(oldAttrValue)) {
+ StringBuffer bufOut = new StringBuffer(oldAttrValue);
+ int offset = 0;
+ String link = null;
+ String newLink = null;
+ boolean modified = false;
+
+ // skip the first paren
+ for (int i = 1; i < r.getParenCount(); i++) {
+ link = r.getParen(i);
+ newLink = createTransformedLink(link);
+ if (newLink != null) {
+ bufOut.replace(r.getParenStart(i) + offset,
+ r.getParenEnd(i) + offset,
+ newLink);
+ offset += newLink.length() - r.getParenLength(i);
+ modified = true;
+ }
+ }
+ if (modified) {
+ newAttrValue = bufOut.toString();
+ }
+ }
}
+
+ return newAttrValue;
}
/**
- * Rewrite link in a set of attributes.
+ * Rewrite a link - use InputModule to obtain new value for the link based on <code>oldLink</code>.
*
- * @param oldAttrs Attributes containing unconverted link.
- * @param linkIndex index of link to convert
- * @param scheme URI scheme (indicating InputModule) of link
- * @param addr URI scheme of link
- * @return an Attributes based on <code>oldAttrs</code>, but with one attribute rewritten.
+ * @param oldLink value of the unconverted link.
+ * @return new value of the link. If not modified, returns null (for example, if link scheme
+ * is in <code>outSchemes</code>.
*/
- private Attributes getLinkAttr(Attributes oldAttrs, int linkIndex, String scheme, String addr) {
- AttributesImpl newAttrs = new AttributesImpl(oldAttrs);
- try {
- String modValue = (String)modHelper.getAttribute(this.objectModel, getConf(scheme), scheme, addr, (badLinkStr!=null?badLinkStr:scheme+":"+addr));
- newAttrs.setValue(linkIndex, modValue);
- } catch (org.apache.avalon.framework.CascadingRuntimeException e) {
- // Rethrow Configuration errors
- if (e.getCause() instanceof ConfigurationException) throw e;
+ private String createTransformedLink(String oldLink) {
+ String newLink = null;
+ int i = oldLink.indexOf(":");
+ if (i != -1) {
+ String scheme = oldLink.substring(0, i);
+ String addr = oldLink.substring(i + 1);
+ if (outSchemes.contains(scheme)) {
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("Ignoring link '" + oldLink + "'");
+ }
+ } else if (inSchemes.contains(scheme) || inSchemes.size() == 0) {
+ // If the link wasn't deliberately excluded from a
+ // list of 'good' links, then include it.
+ try {
+ newLink = (String) modHelper.getAttribute(this.objectModel,
+ getConf(scheme),
+ scheme,
+ addr,
+ (badLinkStr != null? badLinkStr: scheme + ":" + addr));
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("Converted link '" + oldLink + "' to '" + newLink + "'");
+ }
+ } catch (org.apache.avalon.framework.CascadingRuntimeException e) {
+ // Rethrow Configuration errors
+ if (e.getCause() instanceof ConfigurationException) {
+ throw e;
+ }
- // Swallow IM errors, usually prefixes like 'telnet' that aren't
- // bound to an InputModule. These should really be declared in
- // 'exclude-schemes', hence the 'error' classification of this log.
- getLogger().error("Error rewriting link '"+scheme+":"+addr+"': "+e.getMessage());
+ // Swallow IM errors, usually prefixes like 'telnet' that aren't
+ // bound to an InputModule. These should really be declared in
+ // 'exclude-schemes', hence the 'error' classification of this log.
+ if (getLogger().isErrorEnabled()) {
+ getLogger().error("Error rewriting link '" + oldLink + "': " +
+ e.getMessage());
+ }
+ }
+ }
}
- return newAttrs;
+ return newLink;
}
/**
- * Retrieve a dynamic Configuration for a specific InputModule.
+ * Retrieve a dynamic configuration for a specific InputModule.
+ *
* @param scheme InputModule name
* @return Configuration for specified scheme, from the map:transformer block.
*/
private Configuration getConf(String scheme) {
Configuration[] schemeConfs = this.conf.getChildren("input-module");
- for (int i=0; i<schemeConfs.length; i++) {
+ for (int i = 0; i < schemeConfs.length; i++) {
if (scheme.equals(schemeConfs[i].getAttribute("name", null))) {
return schemeConfs[i];
}
@@ -363,10 +594,9 @@
* @see org.apache.avalon.framework.activity.Disposable#dispose()
*/
public void dispose() {
- if ( this.modHelper != null ) {
+ if (this.modHelper != null) {
this.modHelper.releaseAll();
this.modHelper = null;
}
}
-
}