You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by cr...@apache.org on 2006/03/03 02:22:09 UTC

svn commit: r382601 - in /cocoon/branches/BRANCH_2_1_X: src/blocks/html/conf/html-transformer.xmap src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java status.xml

Author: crossley
Date: Thu Mar  2 17:22:07 2006
New Revision: 382601

URL: http://svn.apache.org/viewcvs?rev=382601&view=rev
Log:
html block: Add CleanupTransformer. Enables indent html, add relevant line-breaks,
and remove any namespaces that are not wanted.
fixes-bug="COCOON-1206" due-to="Miles Elam"

Added:
    cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java   (with props)
Modified:
    cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap
    cocoon/branches/BRANCH_2_1_X/status.xml

Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap?rev=382601&r1=382600&r2=382601&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap (original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap Thu Mar  2 17:22:07 2006
@@ -24,4 +24,19 @@
       logger="sitemap.transformer.html"
       src="org.apache.cocoon.transformation.HTMLTransformer"
     />
+
+    <map:transformer
+        name="htmlcleanup"
+        logger="sitemap.transformer.html"
+        src="org.apache.cocoon.transformation.CleanupTransformer">
+      <preserve-uri>*</preserve-uri>
+    </map:transformer>
+
+    <map:transformer
+        name="xhtmlcleanup"
+        logger="sitemap.transformer.html"
+        src="org.apache.cocoon.transformation.CleanupTransformer">
+      <inline-elements>a,abbr,acronym,b,br,font,i,u,img</inline-elements>
+      <preserve-uri>http://www.w3.org/1999/xhtml</preserve-uri>
+    </map:transformer>
 </xmap>

Added: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java?rev=382601&view=auto
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java (added)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java Thu Mar  2 17:22:07 2006
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2006 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.transformation;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Map;
+import java.util.LinkedList;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.StringTokenizer;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.caching.CacheableProcessingComponent;
+import org.apache.cocoon.environment.SourceResolver;
+import org.apache.cocoon.transformation.AbstractSAXTransformer;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.excalibur.source.SourceValidity;
+import org.apache.excalibur.source.impl.validity.NOPValidity;
+
+/**
+ * Cleanup transformer: Removes excess whitespace while adding some where needed
+ *  for legibility. Strips unwanted namespace declarations.
+ *
+ * <p>The cleanup transformer can be used for basically any document as-is or customized by
+ *  schema (inline vs. block elements) for easier reading.</p>
+ *
+ * <p>Transformer declaration:
+ *  &lt;map:components&gt;
+ *   &lt;map:transformers&gt;
+ *    &lt;map:transformer name="htmlcleanup"
+ *            src="org.apache.cocoon.transformation.CleanupTransformer"&gt;
+ *     &lt;preserve-uri&gt;*&lt;/preserve-uri&gt;
+ *    &lt;/map:transformer&gt;
+ *
+ *    &lt;map:transformer name="xhtmlcleanup"
+ *           src="org.apache.cocoon.transformation.CleanupTransformer"&gt;
+ *     &lt;inline-elements&gt;a,abbr,acronym,b,br,font,i,u,img&lt;/inline-elements&gt;
+ *     &lt;preserve-uri&gt;http://www.w3.org/1999/xhtml&lt;/preserve-uri&gt;
+ *    &lt;/map:transformer&gt;
+ *   &lt;/map:transformers&gt;
+ *  &lt;/map:components&gt;
+ * </p>
+ *
+ * <p>The "inline-elements" configuration element refers to a list of element names that are
+ *  <strong>not</strong> to be indented.  The "preserve-uri" configuration element specifies a
+ *  namespace uri mapping that is meant for output.  All other namespace declarations are
+ *  stripped from the output.  The "preserve-uri" element may appear more than once.  If
+ *  "preserve-uri" is omitted, all namespaces/prefixes are removed from the output.</p>
+ *
+ * <p>Transformer usage:
+ *  &lt;transform type="xhtmlcleanup"&gt;
+ *   &lt;map:parameter name="indent-size" value="4"/&gt;
+ *  &lt;/transform&gt;
+ * </p>
+ *
+ * <p>The optional parameter "indent-size" specifies the number of additional space characters
+ *  appearing at each level of the output document.  The default value is 2.</p>
+ *
+ * <p>Bugs: Nested namespace declarations with the same namespace prefix will break the code.</p>
+ *
+ * @author Miles Elam
+ */
+public class CleanupTransformer
+extends AbstractSAXTransformer
+implements CacheableProcessingComponent {
+
+    private static final char[] INDENT = ("\n" +
+        "                                                                                " +
+        "                                                                                "
+        ).toCharArray();
+    private static final int MAX_INDENT = CleanupTransformer.INDENT.length - 1;
+
+    private boolean allowAllURIs = false;
+    private Set allowedURIs = new HashSet();
+    private Set inlineElements = new HashSet();
+    private LinkedList uriPrefixes = new LinkedList();
+    private int indentSize = 2;
+    private int numIndents = 0;
+    private String lastElement = null;
+
+    public void configure(Configuration conf)
+    throws ConfigurationException {
+        Configuration child;
+        StringTokenizer st;
+
+        Configuration inlineEltChild = conf.getChild("inline-elements");
+        st = new StringTokenizer(inlineEltChild.getValue(""), ",");
+        this.inlineElements.clear();
+        while (st.hasMoreTokens()) {
+            String nextElement = st.nextToken().trim();
+            if (nextElement.length() > 0) {
+                this.inlineElements.add(nextElement);
+            }
+        }
+
+        this.allowAllURIs = false;
+        Configuration[] uriChildren = conf.getChildren("preserve-uri");
+        for (int i=0; i<uriChildren.length; ++i) {
+            String nextChild = uriChildren[i].getValue("").trim();
+            if (nextChild.length() == 0) {
+                continue;
+            } else if (nextChild.equals("*")) {
+                this.allowAllURIs = true;
+                break;
+            }
+            this.allowedURIs.add(nextChild);
+        }
+    }
+
+    public void setup (SourceResolver resolver, Map objectModel, String src, Parameters par)
+    throws ProcessingException, SAXException, IOException {
+        super.setup(resolver, objectModel, src, par);
+        this.indentSize = par.getParameterAsInteger("indent-size", 2);
+    }
+    
+    public void recycle () {
+        super.recycle();
+        this.numIndents = 0;
+        this.lastElement = null;
+    }
+
+    public Serializable getKey () {
+        return Integer.toString(this.indentSize);
+    }
+
+    public SourceValidity getValidity () {
+        return NOPValidity.SHARED_INSTANCE;
+    }
+
+    public void startPrefixMapping (String prefix, String uri)
+    throws SAXException {
+        if (this.allowAllURIs) {
+            this.contentHandler.startPrefixMapping(prefix, uri);
+        } else if (this.allowedURIs.contains(uri)) {
+            this.contentHandler.startPrefixMapping(prefix, uri);
+            uriPrefixes.add(prefix);
+        }
+    }
+
+    public void endPrefixMapping (String prefix)
+    throws SAXException {
+        if (this.allowAllURIs) {
+            this.contentHandler.endPrefixMapping(prefix);
+        } else if (!uriPrefixes.isEmpty()) {
+            if (uriPrefixes.getLast().toString().equals(prefix)) {
+                this.contentHandler.endPrefixMapping(prefix);
+                uriPrefixes.removeLast();
+            }
+        }
+    }
+
+    public void startElement (String uri, String qName, String lName, Attributes attrs)
+    throws SAXException {
+        if (!inlineElements.contains(qName)) {
+            int indentSize = (this.indentSize * this.numIndents) % MAX_INDENT;
+            this.contentHandler.ignorableWhitespace(INDENT, 0, indentSize + 1);
+            ++this.numIndents;
+            this.lastElement = qName;
+        }
+        this.contentHandler.startElement(uri, qName, lName, attrs);
+    }
+
+    public void endElement (String uri, String qName, String lName)
+    throws SAXException {
+        if (!inlineElements.contains(qName)) {
+            --this.numIndents;
+            if (this.lastElement == null || !this.lastElement.equals(qName)) {
+                int indentSize = (this.indentSize * this.numIndents) % MAX_INDENT;
+                this.contentHandler.ignorableWhitespace(INDENT, 0, indentSize + 1);
+            }
+            this.lastElement = null;
+        }
+        this.contentHandler.endElement(uri, qName, lName);
+    }
+
+    public void characters (char[] ch, int start, int length)
+    throws SAXException {
+        int end = start + length;
+        for (int i=start; i<end; ++i) {
+            if (!Character.isWhitespace(ch[i])) {
+                this.contentHandler.characters(ch, start, length);
+                return;
+            }
+        }
+        this.contentHandler.characters(INDENT, 1, 1);
+    }
+
+    public void ignorableWhitespace (char[] ch, int start, int length)
+    throws SAXException {
+        // Do nothing
+    }
+}

Propchange: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/CleanupTransformer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: cocoon/branches/BRANCH_2_1_X/status.xml
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/status.xml?rev=382601&r1=382600&r2=382601&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/status.xml (original)
+++ cocoon/branches/BRANCH_2_1_X/status.xml Thu Mar  2 17:22:07 2006
@@ -180,6 +180,10 @@
   <release version="@version@" date="@date@">
 -->
   <release version="2.1.9" date="TBD">
+    <action dev="DC" type="add" fixes-bug="COCOON-1206" due-to="Miles Elam" due-to-email="miles@geekspeak.org">
+       html block: Add CleanupTransformer. Enables indent html, add relevant line-breaks,
+       and remove any namespaces that are not wanted.
+    </action>
     <action dev="JBQ" type="fix" fixes-bug="COCOON-1371" due-to="George Georgovassilis" due-to-email="georgeg@open.gr">
       Allow ImageReader to process other image formats than JPEG
     </action>