You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by jb...@apache.org on 2006/03/25 10:13:47 UTC
svn commit: r388734 - in /cocoon/branches/BRANCH_2_1_X: ./
src/blocks/html/WEB-INF/ src/blocks/html/conf/
src/blocks/html/java/org/apache/cocoon/generation/
src/blocks/html/java/org/apache/cocoon/transformation/
src/blocks/html/samples/ src/blocks/html...
Author: jbq
Date: Sat Mar 25 01:13:44 2006
New Revision: 388734
URL: http://svn.apache.org/viewcvs?rev=388734&view=rev
Log:
COCOON-1639: NekoHTMLTransformer
Also added stylesheet (apache-no-namespace.xsl) for sample of NekoHTMLGenerator
Added:
cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/neko.properties (with props)
cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java (with props)
cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/stylesheets/apache-no-namespace.xsl (with props)
Modified:
cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap
cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap
cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java
cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/samples.xml
cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/sitemap.xmap
cocoon/branches/BRANCH_2_1_X/status.xml
Added: cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/neko.properties
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/neko.properties?rev=388734&view=auto
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/neko.properties (added)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/neko.properties Sat Mar 25 01:13:44 2006
@@ -0,0 +1,33 @@
+# Properties file used by NekoHTMLGenerator, NekoHTMLTransformer.
+# List compiled based on NekoHTML 0.9.5, see also:
+# http://people.apache.org/~andyc/neko/doc/html/settings.html
+
+# values below are commented out as they are the defaults anyway
+
+#http\://xml.org/sax/features/namespaces=true
+#http\://cyberneko.org/html/features/balance-tags=true
+#http\://cyberneko.org/html/features/override-doctype=false
+#http\://cyberneko.org/html/features/insert-doctype=false
+#http\://cyberneko.org/html/features/override-namespaces=false
+#http\://cyberneko.org/html/features/insert-namespaces=false
+#http\://cyberneko.org/html/features/balance-tags/ignore-outside-content=false
+#http\://cyberneko.org/html/features/balance-tags/document-fragment=false
+#http\://cyberneko.org/html/features/scanner/cdata-sections=false
+#http\://apache.org/xml/features/scanner/notify-char-refs=false
+#http\://apache.org/xml/features/scanner/notify-builtin-refs=false
+#http\://cyberneko.org/html/features/scanner/notify-builtin-refs=false
+#http\://cyberneko.org/html/features/scanner/fix-mswindows-refs=false
+#http\://cyberneko.org/html/features/scanner/ignore-specified-charset=false
+#http\://cyberneko.org/html/features/scanner/script/strip-comment-delims=false
+#http\://cyberneko.org/html/features/scanner/script/strip-cdata-delims=false
+#http\://cyberneko.org/html/features/scanner/style/strip-comment-delims=false
+#http\://cyberneko.org/html/features/scanner/style/strip-cdata-delims=false
+#http\://cyberneko.org/html/features/augmentations=false
+#http\://cyberneko.org/html/features/report-errors=false
+#http\://cyberneko.org/html/properties/default-encoding=Windows-1252
+# NB Neko default for names/elems is "upper", but generator/transformer override this
+#http\://cyberneko.org/html/properties/names/elems=lower
+#http\://cyberneko.org/html/properties/names/attrs=lower
+#http\://cyberneko.org/html/properties/doctype/pubid=-//W3C//DTD HTML 4.01 Transitional//EN
+#http\://cyberneko.org/html/properties/doctype/sysid=http://www.w3.org/TR/html4/loose.dtd
+#http\://cyberneko.org/html/properties/namespaces-uri=http://www.w3.org/1999/xhtml
Propchange: cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/neko.properties
------------------------------------------------------------------------------
svn:eol-style = native
Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap?rev=388734&r1=388733&r2=388734&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap (original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html-transformer.xmap Sat Mar 25 01:13:44 2006
@@ -22,8 +22,19 @@
<map:transformer
name="html"
logger="sitemap.transformer.html"
- src="org.apache.cocoon.transformation.HTMLTransformer"
- />
+ src="org.apache.cocoon.transformation.HTMLTransformer">
+ <!-- Tidy configuration file.
+ <jtidy-config>context://WEB-INF/tidy.properties</jtidy-config>
+ -->
+ </map:transformer>
+ <map:transformer
+ name="nekohtml"
+ logger="sitemap.transformer.html"
+ src="org.apache.cocoon.transformation.NekoHTMLTransformer">
+ <!-- NekoHTML configuration file.
+ <neko-config>context://WEB-INF/neko.properties</neko-config>
+ -->
+ </map:transformer>
<map:transformer
name="htmlcleanup"
Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap?rev=388734&r1=388733&r2=388734&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap (original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap Sat Mar 25 01:13:44 2006
@@ -31,8 +31,8 @@
logger="sitemap.generator.html"
src="org.apache.cocoon.generation.NekoHTMLGenerator"
label="content">
- <!-- Tidy configuration file.
- <neko-config>???</neko-config>
+ <!-- NekoHTML configuration file.
+ <neko-config>context://WEB-INF/neko.properties</neko-config>
-->
</map:generator>
</xmap>
Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java?rev=388734&r1=388733&r2=388734&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java (original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java Sat Mar 25 01:13:44 2006
@@ -316,7 +316,11 @@
if (properties != null) {
for (Iterator i = properties.keySet().iterator();i.hasNext();) {
String name = (String) i.next();
- config.setProperty(name, properties.getProperty(name));
+ if (name.indexOf("/features/") > -1) {
+ config.setFeature(name, Boolean.getBoolean(properties.getProperty(name)));
+ } else if (name.indexOf("/properties/") > -1) {
+ config.setProperty(name, properties.getProperty(name));
+ }
}
}
return config;
Added: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java?rev=388734&view=auto
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java (added)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java Sat Mar 25 01:13:44 2006
@@ -0,0 +1,213 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.transformation;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.environment.SourceResolver;
+import org.apache.cocoon.xml.dom.DOMBuilder;
+import org.apache.cocoon.xml.IncludeXMLConsumer;
+import org.apache.cocoon.xml.XMLUtils;
+import org.apache.excalibur.source.Source;
+import org.apache.xerces.parsers.AbstractSAXParser;
+import org.cyberneko.html.HTMLConfiguration;
+import org.w3c.dom.Document;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+/**
+ * Converts (escaped) HTML snippets into tidied HTML using the NekoHTML library.
+ * This transformer expects a list of elements, passed as comma separated
+ * values of the "tags" parameter. It records the text enclosed in such
+ * elements and pass it thru Neko to obtain valid XHTML.
+ *
+ * @version $Id$
+ */
+public class NekoHTMLTransformer
+ extends AbstractSAXTransformer
+ implements Configurable {
+
+ /**
+ * Properties for Neko format
+ */
+ private Properties properties;
+
+ /**
+ * Tags that must be normalized
+ */
+ private Map tags;
+
+ /**
+ * React on endElement calls that contain a tag to be
+ * tidied and run Neko on it, otherwise passthru.
+ *
+ * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
+ */
+ public void endElement(String uri, String name, String raw)
+ throws SAXException {
+ if (this.tags.containsKey(name)) {
+ String toBeNormalized = this.endTextRecording();
+ try {
+ this.normalize(toBeNormalized);
+ } catch (ProcessingException e) {
+ e.printStackTrace();
+ }
+ }
+ super.endElement(uri, name, raw);
+ }
+
+ /**
+ * Start buffering text if inside a tag to be normalized,
+ * passthru otherwise.
+ *
+ * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
+ */
+ public void startElement(
+ String uri,
+ String name,
+ String raw,
+ Attributes attr)
+ throws SAXException {
+ super.startElement(uri, name, raw, attr);
+ if (this.tags.containsKey(name)) {
+ this.startTextRecording();
+ }
+ }
+
+ /**
+ * Configure this transformer, possibly passing to it
+ * a jtidy configuration file location.
+ */
+ public void configure(Configuration config) throws ConfigurationException {
+ super.configure(config);
+
+ String configUrl = config.getChild("neko-config").getValue(null);
+ if (configUrl != null) {
+ org.apache.excalibur.source.SourceResolver resolver = null;
+ Source configSource = null;
+ try {
+ resolver = (org.apache.excalibur.source.SourceResolver)
+ this.manager.lookup(org.apache.excalibur.source.SourceResolver.ROLE);
+ configSource = resolver.resolveURI(configUrl);
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug(
+ "Loading configuration from " + configSource.getURI());
+ }
+ this.properties = new Properties();
+ this.properties.load(configSource.getInputStream());
+
+ } catch (Exception e) {
+ getLogger().warn("Cannot load configuration from " + configUrl);
+ throw new ConfigurationException(
+ "Cannot load configuration from " + configUrl,
+ e);
+ } finally {
+ if (null != resolver) {
+ this.manager.release(resolver);
+ resolver.release(configSource);
+ }
+ }
+ }
+ }
+
+ /**
+ * The beef: run Neko on the buffered text and stream
+ * the result
+ *
+ * @param text the string to be tidied
+ */
+ private void normalize(String text) throws ProcessingException {
+ try {
+ HtmlSaxParser parser = new HtmlSaxParser(this.properties);
+
+ ByteArrayInputStream bais =
+ new ByteArrayInputStream(text.getBytes());
+
+ DOMBuilder builder = new DOMBuilder();
+ parser.setContentHandler(builder);
+ parser.parse(new InputSource(bais));
+ Document doc = builder.getDocument();
+
+ IncludeXMLConsumer.includeNode(doc, this.contentHandler, this.lexicalHandler);
+ } catch (Exception e) {
+ throw new ProcessingException(
+ "Exception in NekoHTMLTransformer.normalize()",
+ e);
+ }
+ }
+
+ /**
+ * Setup this component, passing the tag names to be tidied.
+ */
+
+ public void setup(
+ SourceResolver resolver,
+ Map objectModel,
+ String src,
+ Parameters par)
+ throws ProcessingException, SAXException, IOException {
+ super.setup(resolver, objectModel, src, par);
+ String tagsParam = par.getParameter("tags", "");
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("tags: " + tagsParam);
+ }
+ this.tags = new HashMap();
+ StringTokenizer tokenizer = new StringTokenizer(tagsParam, ",");
+ while (tokenizer.hasMoreElements()) {
+ String tok = tokenizer.nextToken().trim();
+ this.tags.put(tok, tok);
+ }
+ }
+
+ public static class HtmlSaxParser extends AbstractSAXParser {
+
+ public HtmlSaxParser(Properties properties) {
+ super(getConfig(properties));
+ }
+
+ private static HTMLConfiguration getConfig(Properties properties) {
+ HTMLConfiguration config = new HTMLConfiguration();
+ config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
+ if (properties != null) {
+ for (Iterator i = properties.keySet().iterator();i.hasNext();) {
+ String name = (String) i.next();
+ if (name.indexOf("/features/") > -1) {
+ config.setFeature(name, Boolean.getBoolean(properties.getProperty(name)));
+ } else if (name.indexOf("/properties/") > -1) {
+ config.setProperty(name, properties.getProperty(name));
+ }
+ }
+ }
+ return config;
+ }
+ }
+
+}
Propchange: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/NekoHTMLTransformer.java
------------------------------------------------------------------------------
svn:keywords = Id
Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/samples.xml
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/samples.xml?rev=388734&r1=388733&r2=388734&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/samples.xml (original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/samples.xml Sat Mar 25 01:13:44 2006
@@ -26,13 +26,15 @@
<group name="HTMLGenerator">
<sample name="Cocoon News Website" href="apache">
- Shows how to get remote resource and convert it to valid XHTML using HTMLGenerator.
+ Shows how to get remote resource and convert it to valid XHTML using HTMLGenerator. Tidy produces content in the
+ XHTML namespace.
</sample>
</group>
<group name="NekoHTMLGenerator">
<sample name="Cocoon News Website" href="apache-neko">
- Shows how to get remote resource and convert it to valid XML using NekoHTMLGenerator.
+ Shows how to get remote resource and convert it to valid XML using NekoHTMLGenerator. Neko produces content
+ without namespace.
</sample>
</group>
@@ -42,6 +44,17 @@
as strings inside elements (as often found in RSS feeds)
</sample>
<sample name="Parsed output" href="HTMLTransformer/parsed.xml">
+ HTMLTransformer applied to input.xml: escaped
+ HTML markup is converted to XHTML
+ </sample>
+ </group>
+
+ <group name="NekoHTMLTransformer">
+ <sample name="XML input" href="NekoHTMLTransformer/input.xml">
+ XML document containing escaped HTML, blocks of HTML code written
+ as strings inside elements (as often found in RSS feeds)
+ </sample>
+ <sample name="Parsed output" href="NekoHTMLTransformer/parsed.xml">
HTMLTransformer applied to input.xml: escaped
HTML markup is converted to XHTML
</sample>
Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/sitemap.xmap
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/sitemap.xmap?rev=388734&r1=388733&r2=388734&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/sitemap.xmap (original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/sitemap.xmap Sat Mar 25 01:13:44 2006
@@ -71,7 +71,8 @@
<map:match pattern="apache-neko">
<map:generate type="nekohtml" src="http://cocoon.apache.org/news"/>
- <map:serialize type="xml"/>
+ <map:transform src="stylesheets/apache-no-namespace.xsl"/>
+ <map:serialize type="xhtml"/>
</map:match>
<!-- ================ HTMLTransformer ================= -->
@@ -90,6 +91,21 @@
<map:serialize type="xhtml"/>
</map:match>
+ <!-- ================ NekoHTMLTransformer ================= -->
+
+ <map:match pattern="NekoHTMLTransformer/input.xml">
+ <map:generate src="htmltransformer/input.xml"/>
+ <map:serialize type="xml"/>
+ </map:match>
+
+ <map:match pattern="NekoHTMLTransformer/parsed.xml">
+ <map:generate src="htmltransformer/input.xml"/>
+ <map:transform type="nekohtml">
+ <map:parameter name="tags" value="description,escaped-html"/>
+ </map:transform>
+ <map:transform src="htmltransformer/post-transformer-filter.xsl"/>
+ <map:serialize type="xhtml"/>
+ </map:match>
</map:pipeline>
</map:pipelines>
</map:sitemap>
Added: cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/stylesheets/apache-no-namespace.xsl
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/stylesheets/apache-no-namespace.xsl?rev=388734&view=auto
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/stylesheets/apache-no-namespace.xsl (added)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/stylesheets/apache-no-namespace.xsl Sat Mar 25 01:13:44 2006
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+<!--
+ Copyright 1999-2004 The Apache Software Foundation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+<xsl:template match="/html">
+ <html>
+ <head>
+ <title><xsl:value-of select="head/title"/></title>
+ </head>
+ <body>
+ <h2><xsl:value-of select="head/title"/></h2>
+ <ul>
+ <xsl:apply-templates select="//div[@class='content']/ul"/>
+ </ul>
+ </body>
+ </html>
+</xsl:template>
+
+<xsl:template match="ul">
+ <ul>
+ <xsl:apply-templates select="li"/>
+ </ul>
+</xsl:template>
+
+<xsl:template match="li">
+ <li><xsl:apply-templates/></li>
+</xsl:template>
+
+<xsl:template match="a">
+ <a href="http://cocoon.apache.org/news/{@href}" title="{@title}">
+ <xsl:value-of select="text()"/>
+ </a>
+</xsl:template>
+
+</xsl:stylesheet>
Propchange: cocoon/branches/BRANCH_2_1_X/src/blocks/html/samples/stylesheets/apache-no-namespace.xsl
------------------------------------------------------------------------------
svn:eol-style = native
Modified: cocoon/branches/BRANCH_2_1_X/status.xml
URL: http://svn.apache.org/viewcvs/cocoon/branches/BRANCH_2_1_X/status.xml?rev=388734&r1=388733&r2=388734&view=diff
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/status.xml (original)
+++ cocoon/branches/BRANCH_2_1_X/status.xml Sat Mar 25 01:13:44 2006
@@ -180,6 +180,10 @@
<release version="@version@" date="@date@">
-->
<release version="2.1.9" date="TBD">
+ <action dev="JBQ" type="add" fixes-bug="COCOON-1639" due-to="Andrew Stevens" due-to-email="ats37@hotmail.com">
+ Added the NekoHTMLTransformer. Updated the NekoHTMLGenerator's setup bits to allow for setting parser features as
+ well as properties, and provided a sample neko.properties configuration file.
+ </action>
<action dev="AG" type="update">
Updated asm to 2.2.1, asm-util to 2.2.1 groovy to 1.0-jsr-05 and antlr to 2.7.6.
</action>