You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by cz...@apache.org on 2005/09/12 10:52:09 UTC

svn commit: r280293 - in /cocoon: blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java blocks/portal/trunk/pom.xml trunk/gump.xml

Author: cziegeler
Date: Mon Sep 12 01:52:02 2005
New Revision: 280293

URL: http://svn.apache.org/viewcvs?rev=280293&view=rev
Log:
Get rid of jtidy

Added:
    cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java   (with props)
Modified:
    cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java
    cocoon/blocks/portal/trunk/pom.xml
    cocoon/trunk/gump.xml

Modified: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java
URL: http://svn.apache.org/viewcvs/cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java?rev=280293&r1=280292&r2=280293&view=diff
==============================================================================
--- cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java (original)
+++ cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java Mon Sep 12 01:52:02 2005
@@ -15,11 +15,8 @@
  */
 package org.apache.cocoon.portal.transformation;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.PrintWriter;
-import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
@@ -42,6 +39,7 @@
 import org.apache.cocoon.portal.coplet.CopletData;
 import org.apache.cocoon.portal.coplet.CopletInstanceData;
 import org.apache.cocoon.portal.profile.ProfileManager;
+import org.apache.cocoon.portal.util.HtmlDomParser;
 import org.apache.cocoon.transformation.AbstractTransformer;
 import org.apache.cocoon.util.NetUtils;
 import org.apache.cocoon.xml.XMLUtils;
@@ -49,8 +47,6 @@
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
-import org.w3c.tidy.Configuration;
-import org.w3c.tidy.Tidy;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 
@@ -129,9 +125,9 @@
     protected Request request;
 
     /**
-     * The encoding (JTidy constant) if configured
+     * The encoding if configured
      */
-    protected int configuredEncoding;
+    protected String configuredEncoding;
 
     /**
      * The user agent identification string if confiugured
@@ -187,7 +183,7 @@
             copletInstanceData.setAttribute(DOCUMENT_BASE, this.documentBase);
         }
 
-        this.configuredEncoding = encodingConstantFromString((String)copletData.getAttribute("encoding"));
+        this.configuredEncoding = (String)copletData.getAttribute("encoding");
         this.userAgent = (String)copletData.getAttribute("user-agent");
         this.envelopeTag = parameters.getParameter(ENVELOPE_TAG_PARAMETER, this.defaultEnvelopeTag);
 
@@ -445,7 +441,7 @@
     protected Document readXML(HttpURLConnection connection) 
     throws SAXException {
         try {
-            int charEncoding = configuredEncoding;
+            String encoding = configuredEncoding;
 
             String contentType = connection.getHeaderField("Content-Type");
             int begin = contentType.indexOf("charset=");
@@ -456,51 +452,13 @@
                 if (end == -1) {
                     end = contentType.length();
                 }
-                String charset = contentType.substring(begin, end);
-                charEncoding = encodingConstantFromString(charset);
+                encoding = contentType.substring(begin, end);
             }
 
-            InputStream stream = connection.getInputStream();
-            // Setup an instance of Tidy.
-            Tidy tidy = new Tidy();
-            tidy.setXmlOut(true);
-
-            tidy.setCharEncoding(charEncoding);
-            tidy.setXHTML(true);
-
-            //Set Jtidy warnings on-off
-            tidy.setShowWarnings(this.getLogger().isWarnEnabled());
-            //Set Jtidy final result summary on-off
-            tidy.setQuiet(!this.getLogger().isInfoEnabled());
-            //Set Jtidy infos to a String (will be logged) instead of System.out
-            StringWriter stringWriter = new StringWriter();
-            //FIXME ??
-            PrintWriter errorWriter = new PrintWriter(stringWriter);
-            tidy.setErrout(errorWriter);
-            // Extract the document using JTidy and stream it.
-            Document doc = tidy.parseDOM(new BufferedInputStream(stream), null);
-            errorWriter.flush();
-            errorWriter.close();
-            return doc;
+            return HtmlDomParser.parse(connection.getURL().toExternalForm(), connection.getInputStream(), encoding);
+
         } catch (Exception ex) {
             throw new SAXException(ex);
-        }
-    }
-
-    /**
-     * Helper method to convert the HTTP encoding String to JTidy encoding constants.
-     * @param encoding the HTTP encoding String
-     * @return the corresponding JTidy constant.
-     */
-    private int encodingConstantFromString(String encoding) {
-        if ("ISO8859_1".equalsIgnoreCase(encoding)) {
-            return Configuration.LATIN1;
-        }
-        else if ("UTF-8".equalsIgnoreCase(encoding)) {
-            return Configuration.UTF8;
-        }
-        else {
-            return Configuration.LATIN1;
         }
     }
 

Added: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
URL: http://svn.apache.org/viewcvs/cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java?rev=280293&view=auto
==============================================================================
--- cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java (added)
+++ cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java Mon Sep 12 01:52:02 2005
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.portal.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.Properties;
+
+import org.apache.xerces.parsers.AbstractDOMParser;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.cyberneko.html.HTMLConfiguration;
+import org.w3c.dom.Document;
+
+/**
+ * This parser uses the nekohtml parser to parse html and generate a document.
+ *
+ * @version $Id$
+ */
+public class HtmlDomParser extends AbstractDOMParser {
+
+    public HtmlDomParser(Properties properties) {
+        super(getConfig(properties));
+    }
+
+    protected static HTMLConfiguration getConfig(Properties properties) {
+        HTMLConfiguration config = new HTMLConfiguration();
+        config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
+        if (properties != null) {
+            for (Iterator i = properties.keySet().iterator();i.hasNext();) {
+                String name = (String) i.next();
+                config.setProperty(name, properties.getProperty(name));
+            }
+        }
+        return config;
+    }
+
+    /**
+     * Parse html.
+     */
+    public static Document parse(String systemId, InputStream stream, String encoding)
+    throws IOException {
+        final HtmlDomParser parser = new HtmlDomParser(null);
+        XMLInputSource source = new XMLInputSource(null, systemId, null, stream, encoding);
+        parser.parse(source);
+        return parser.getDocument();
+    }
+}

Propchange: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
------------------------------------------------------------------------------
    svn:keywords = Id

Modified: cocoon/blocks/portal/trunk/pom.xml
URL: http://svn.apache.org/viewcvs/cocoon/blocks/portal/trunk/pom.xml?rev=280293&r1=280292&r2=280293&view=diff
==============================================================================
--- cocoon/blocks/portal/trunk/pom.xml (original)
+++ cocoon/blocks/portal/trunk/pom.xml Mon Sep 12 01:52:02 2005
@@ -97,11 +97,6 @@
       <version>0.2</version>
     </dependency>
     <dependency>
-      <groupId>jtidy</groupId>
-      <artifactId>jtidy</artifactId>
-      <version>4aug2000r7-dev</version>
-    </dependency>
-    <dependency>
       <groupId>org.apache.pluto</groupId>
       <artifactId>pluto</artifactId>
       <version>1.0.1-rc4</version>

Modified: cocoon/trunk/gump.xml
URL: http://svn.apache.org/viewcvs/cocoon/trunk/gump.xml?rev=280293&r1=280292&r2=280293&view=diff
==============================================================================
--- cocoon/trunk/gump.xml (original)
+++ cocoon/trunk/gump.xml Mon Sep 12 01:52:02 2005
@@ -921,7 +921,6 @@
     <depend project="cocoon-block-authentication-fw"/>
     <depend project="cocoon-block-forms"/>
     <depend project="cocoon-block-cron"/>
-    <depend project="jtidy"/>
     <depend project="nekohtml"/>
     <depend project="castor"/>
     <depend project="commons-collections"/>
@@ -933,7 +932,6 @@
     <depend project="wsrp4j"/>
     <depend project="commons-discovery"/>
 
-    <library name="jtidy"/>
     <library name="nekohtml"/>
     <library name="castor"/>
     <library name="commons-collections"/>