You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by vg...@apache.org on 2004/11/12 02:54:50 UTC

svn commit: rev 57483 - in cocoon/branches/BRANCH_2_1_X: . lib lib/optional src/blocks/html/WEB-INF src/blocks/html/conf src/blocks/html/java/org/apache/cocoon/generation src/blocks/html/java/org/apache/cocoon/transformation

Author: vgritsenko
Date: Thu Nov 11 17:54:49 2004
New Revision: 57483

Added:
   cocoon/branches/BRANCH_2_1_X/lib/optional/nekohtml-0.9.3.jar
      - copied unchanged from rev 57469, cocoon/trunk/src/blocks/html/lib/nekohtml-0.9.3.jar
   cocoon/branches/BRANCH_2_1_X/src/blocks/html/WEB-INF/
      - copied from rev 57469, cocoon/trunk/src/blocks/html/WEB-INF/
   cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap
      - copied, changed from rev 57468, cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap
   cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java
      - copied, changed from rev 57469, cocoon/trunk/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java
Removed:
   cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap
Modified:
   cocoon/branches/BRANCH_2_1_X/gump.xml
   cocoon/branches/BRANCH_2_1_X/lib/jars.xml
   cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java
   cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java
   cocoon/branches/BRANCH_2_1_X/status.xml
Log:
merge html block (trunk -> branch 2.1)


Modified: cocoon/branches/BRANCH_2_1_X/gump.xml
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/gump.xml	(original)
+++ cocoon/branches/BRANCH_2_1_X/gump.xml	Thu Nov 11 17:54:49 2004
@@ -528,9 +528,10 @@
 
     <depend project="cocoon" inherit="all"/>
     <depend project="jtidy"/>
-    <depend project="jakarta-servletapi"/>
+    <depend project="nekohtml"/>
 
     <library name="jtidy"/>
+    <library name="nekohtml"/>
 
     <work nested="tools/anttasks"/>
     <home nested="build/cocoon-@@DATE@@"/>

Modified: cocoon/branches/BRANCH_2_1_X/lib/jars.xml
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/lib/jars.xml	(original)
+++ cocoon/branches/BRANCH_2_1_X/lib/jars.xml	Thu Nov 11 17:54:49 2004
@@ -538,6 +538,14 @@
   </file>
 
   <file>
+    <title>Transform HTML to XML</title>
+    <description>NekoHTML is a lightweight HTML syntax correcter written using Xerces Native Interface.</description>
+    <used-by>NekoHTML generator (html block)</used-by>
+    <lib>optional/nekohtml-0.9.3.jar</lib>
+    <homepage>http://www.apache.org/~andyc/neko/</homepage>
+  </file>
+
+  <file>
     <title>Search engine</title>
     <description>
       jakarta-lucene is a search engine toolkit designed for indexing and

Copied: cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap (from rev 57468, cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap)
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/tidy.xmap	(original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/conf/html.xmap	Thu Nov 11 17:54:49 2004
@@ -18,7 +18,20 @@
 <xmap xpath="/sitemap/components/generators"
       unless="generator[@name='html']">
 
-    <map:generator name="html" logger="sitemap.generator.html"
+    <map:generator name="html"
+                   logger="sitemap.generator.html"
                    src="org.apache.cocoon.generation.HTMLGenerator"
-                   label="content"/>
+                   label="content">
+      <!-- Tidy configuration file.
+      <jtidy-config>context://WEB-INF/tidy.properties</jtidy-config>
+      -->
+    </map:generator>
+    <map:generator name="nekohtml"
+                   logger="sitemap.generator.html"
+                   src="org.apache.cocoon.generation.NekoHTMLGenerator"
+                   label="content">
+      <!-- Tidy configuration file.
+      <neko-config>???</neko-config>
+      -->
+    </map:generator>
 </xmap>

Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java	(original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/HTMLGenerator.java	Thu Nov 11 17:54:49 2004
@@ -1,12 +1,12 @@
 /*
  * Copyright 1999-2004 The Apache Software Foundation.
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *      http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -34,7 +34,6 @@
 import org.apache.avalon.framework.service.ServiceException;
 import org.apache.avalon.framework.service.ServiceManager;
 import org.apache.cocoon.ProcessingException;
-import org.apache.cocoon.ResourceNotFoundException;
 import org.apache.cocoon.caching.CacheableProcessingComponent;
 import org.apache.cocoon.components.source.SourceUtil;
 import org.apache.cocoon.environment.ObjectModelHelper;
@@ -56,13 +55,13 @@
  * @cocoon.sitemap.component.documentation
  * The html generator reads HTML from a source, converts it to XHTML
  * and generates SAX Events.
- * 
+ *
  * @cocoon.sitemap.component.name   html
  * @cocoon.sitemap.component.label  content
  * @cocoon.sitemap.component.logger sitemap.generator.html
  * @cocoon.sitemap.component.documentation.caching
  *               Uses the last modification date of the xml document for validation
- * 
+ *
  * @cocoon.sitemap.component.pooling.min   4
  * @cocoon.sitemap.component.pooling.max  32
  * @cocoon.sitemap.component.pooling.grow  4
@@ -72,7 +71,7 @@
  * @author <a href="mailto:barozzi@nicolaken.com">Nicola Ken Barozzi</a>
  * @author <a href="mailto:gianugo@apache.org">Gianugo Rabellino</a>
  *
- * @version CVS $Id: HTMLGenerator.java,v 1.12 2004/05/03 13:07:26 cziegeler Exp $
+ * @version CVS $Id$
  */
 public class HTMLGenerator extends ServiceableGenerator
 implements Configurable, CacheableProcessingComponent, Disposable {
@@ -153,7 +152,7 @@
         super.setup(resolver, objectModel, src, par);
 
         Request request = ObjectModelHelper.getRequest(objectModel);
-        
+
         if (src == null) {
             // Handle this request as the StreamGenerator does (from the POST
             // request or from a request parameter), but try to make sure
@@ -199,8 +198,9 @@
         }
 
         xpath = request.getParameter("xpath");
-        if(xpath == null)
+        if (xpath == null) {
             xpath = par.getParameter("xpath",null);
+        }
 
         // append the request parameter to the URL if necessary
         if (par.getParameterAsBoolean("copy-parameters", false)
@@ -212,8 +212,9 @@
         }
 
         try {
-            if (source != null)
+            if (source != null) {
                 this.inputSource = resolver.resolveURI(super.source);
+            }
         } catch (SourceException se) {
             throw SourceUtil.handle("Unable to resolve " + super.source, se);
         }
@@ -228,8 +229,9 @@
      *              is currently not cacheable.
      */
     public java.io.Serializable getKey() {
-        if (this.inputSource == null)
+        if (this.inputSource == null) {
             return null;
+        }
 
         if (this.xpath != null) {
             StringBuffer buffer = new StringBuffer(this.inputSource.getURI());
@@ -249,8 +251,9 @@
      *         component is currently not cacheable.
      */
     public SourceValidity getValidity() {
-        if (this.inputSource == null)
+        if (this.inputSource == null) {
             return null;
+        }
         return this.inputSource.getValidity();
     }
 
@@ -313,13 +316,8 @@
                 domStreamer.stream(doc.getDocumentElement());
             }
             this.contentHandler.endDocument();
-        } catch (IOException e){
-            throw new ResourceNotFoundException("Could not get resource "
-                + this.inputSource.getURI(), e);
         } catch (SAXException e){
-            throw e;
-        } catch (Exception e){
-            throw new ProcessingException("Exception in HTMLGenerator.generate()",e);
+            SourceUtil.handleSAXException(this.inputSource.getURI(), e);
         }
     }
 

Copied: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java (from rev 57469, cocoon/trunk/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java)
==============================================================================
--- cocoon/trunk/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java	(original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/generation/NekoHTMLGenerator.java	Thu Nov 11 17:54:49 2004
@@ -73,7 +73,7 @@
  * @author <a href="mailto:barozzi@nicolaken.com">Nicola Ken Barozzi</a>
  * @author <a href="mailto:gianugo@apache.org">Gianugo Rabellino</a>
  *
- * @version CVS $Id: NekoHTMLGenerator.java,v 1.2 2004/07/08 12:04:08 upayavira Exp $
+ * @version CVS $Id$
  */
 public class NekoHTMLGenerator extends ServiceableGenerator
 implements Configurable, CacheableProcessingComponent, Disposable {

Modified: cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java	(original)
+++ cocoon/branches/BRANCH_2_1_X/src/blocks/html/java/org/apache/cocoon/transformation/HTMLTransformer.java	Thu Nov 11 17:54:49 2004
@@ -40,23 +40,26 @@
 import org.xml.sax.SAXException;
 
 /**
- * Unstable transformer: converts (escaped) HTML snippets into JTidied HTML. 
+ * Converts (escaped) HTML snippets into JTidied HTML. 
  * This transformer expects a list of elements, passed as comma separated
  * values of the "tags" parameter. It records the text enclosed in such
  * elements and pass it thru JTidy to obtain valid XHTML.
- * TODO: add namespace support.
- * WARNING: this transformer should be considered unstable.
+ *
+ * <p>TODO: Add namespace support.
+ * <p><strong>WARNING:</strong> This transformer should be considered unstable.
  *
  * @author <a href="mailto:d.madama@pro-netics.com">Daniele Madama</a>
  * @author <a href="mailto:gianugo@apache.org">Gianugo Rabellino</a>
+ *
+ * @version CVS $Id$
  */
 public class HTMLTransformer
     extends AbstractSAXTransformer
     implements Configurable {
 
-	/**
-	 * Properties for Tidy format
-	 */
+    /**
+     * Properties for Tidy format
+     */
     private Properties properties;
     
     /**
@@ -104,7 +107,6 @@
     /**
      * Configure this transformer, possibly passing to it
      * a jtidy configuration file location.
-     *
      */
     public void configure(Configuration config) throws ConfigurationException {
         String configUrl = config.getChild("jtidy-config").getValue(null);
@@ -209,5 +211,4 @@
             this.tags.put(tok, tok);
         }
     }
-    
 }

Modified: cocoon/branches/BRANCH_2_1_X/status.xml
==============================================================================
--- cocoon/branches/BRANCH_2_1_X/status.xml	(original)
+++ cocoon/branches/BRANCH_2_1_X/status.xml	Thu Nov 11 17:54:49 2004
@@ -452,7 +452,7 @@
    </action>
    <action dev="AG" type="update">
      Update ant to 1.6.2, commons-jxpath to 1.2, commons-beanutils to 1.7,
-     POI to 2.5.1-final-20040804, commons-httpclient to 2.0.1,
+     POI to 2.5.1-final-20040804, commons-httpclient to 2.0.1, nekohtml to 0.9.3,
      hsqldb to 1.7.2, jcs to 1.1-dev-20040811, quartz to 1.4.2, asm to 1.4.3,
      asm-util to 1.4.3 and groovy to 1.0-beta6
    </action>
@@ -483,7 +483,12 @@
      Still in the scratchpad area at the time of this writing, added a
      CachedSource proxy subclass for Sources that implement TraversableSource and
      InspectableSource (for instance WebDAVSource).
-   </action> 
+   </action>
+   <action dev="UV" type="add">
+     Added a NekoHTMLGenerator to HTML block. This is a simpler HTML parser than
+     JTidy, which preserves more of the original HTML, primarily just balancing
+     closing tags.
+   </action>
    <action dev="TC" type="add" fixes-bug="29935" due-to="Leszek Gawron" due-to-email="ouzo@wlkp.org">
      Added support for stripping root elements in the CIncludeTransformer.
    </action>