You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by cz...@apache.org on 2005/09/12 10:52:09 UTC
svn commit: r280293 - in /cocoon:
blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java
blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
blocks/portal/trunk/pom.xml trunk/gump.xml
Author: cziegeler
Date: Mon Sep 12 01:52:02 2005
New Revision: 280293
URL: http://svn.apache.org/viewcvs?rev=280293&view=rev
Log:
Get rid of jtidy
Added:
cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java (with props)
Modified:
cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java
cocoon/blocks/portal/trunk/pom.xml
cocoon/trunk/gump.xml
Modified: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java
URL: http://svn.apache.org/viewcvs/cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java?rev=280293&r1=280292&r2=280293&view=diff
==============================================================================
--- cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java (original)
+++ cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/transformation/ProxyTransformer.java Mon Sep 12 01:52:02 2005
@@ -15,11 +15,8 @@
*/
package org.apache.cocoon.portal.transformation;
-import java.io.BufferedInputStream;
import java.io.IOException;
-import java.io.InputStream;
import java.io.PrintWriter;
-import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
@@ -42,6 +39,7 @@
import org.apache.cocoon.portal.coplet.CopletData;
import org.apache.cocoon.portal.coplet.CopletInstanceData;
import org.apache.cocoon.portal.profile.ProfileManager;
+import org.apache.cocoon.portal.util.HtmlDomParser;
import org.apache.cocoon.transformation.AbstractTransformer;
import org.apache.cocoon.util.NetUtils;
import org.apache.cocoon.xml.XMLUtils;
@@ -49,8 +47,6 @@
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
-import org.w3c.tidy.Configuration;
-import org.w3c.tidy.Tidy;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
@@ -129,9 +125,9 @@
protected Request request;
/**
- * The encoding (JTidy constant) if configured
+ * The encoding if configured
*/
- protected int configuredEncoding;
+ protected String configuredEncoding;
/**
* The user agent identification string if confiugured
@@ -187,7 +183,7 @@
copletInstanceData.setAttribute(DOCUMENT_BASE, this.documentBase);
}
- this.configuredEncoding = encodingConstantFromString((String)copletData.getAttribute("encoding"));
+ this.configuredEncoding = (String)copletData.getAttribute("encoding");
this.userAgent = (String)copletData.getAttribute("user-agent");
this.envelopeTag = parameters.getParameter(ENVELOPE_TAG_PARAMETER, this.defaultEnvelopeTag);
@@ -445,7 +441,7 @@
protected Document readXML(HttpURLConnection connection)
throws SAXException {
try {
- int charEncoding = configuredEncoding;
+ String encoding = configuredEncoding;
String contentType = connection.getHeaderField("Content-Type");
int begin = contentType.indexOf("charset=");
@@ -456,51 +452,13 @@
if (end == -1) {
end = contentType.length();
}
- String charset = contentType.substring(begin, end);
- charEncoding = encodingConstantFromString(charset);
+ encoding = contentType.substring(begin, end);
}
- InputStream stream = connection.getInputStream();
- // Setup an instance of Tidy.
- Tidy tidy = new Tidy();
- tidy.setXmlOut(true);
-
- tidy.setCharEncoding(charEncoding);
- tidy.setXHTML(true);
-
- //Set Jtidy warnings on-off
- tidy.setShowWarnings(this.getLogger().isWarnEnabled());
- //Set Jtidy final result summary on-off
- tidy.setQuiet(!this.getLogger().isInfoEnabled());
- //Set Jtidy infos to a String (will be logged) instead of System.out
- StringWriter stringWriter = new StringWriter();
- //FIXME ??
- PrintWriter errorWriter = new PrintWriter(stringWriter);
- tidy.setErrout(errorWriter);
- // Extract the document using JTidy and stream it.
- Document doc = tidy.parseDOM(new BufferedInputStream(stream), null);
- errorWriter.flush();
- errorWriter.close();
- return doc;
+ return HtmlDomParser.parse(connection.getURL().toExternalForm(), connection.getInputStream(), encoding);
+
} catch (Exception ex) {
throw new SAXException(ex);
- }
- }
-
- /**
- * Helper method to convert the HTTP encoding String to JTidy encoding constants.
- * @param encoding the HTTP encoding String
- * @return the corresponding JTidy constant.
- */
- private int encodingConstantFromString(String encoding) {
- if ("ISO8859_1".equalsIgnoreCase(encoding)) {
- return Configuration.LATIN1;
- }
- else if ("UTF-8".equalsIgnoreCase(encoding)) {
- return Configuration.UTF8;
- }
- else {
- return Configuration.LATIN1;
}
}
Added: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
URL: http://svn.apache.org/viewcvs/cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java?rev=280293&view=auto
==============================================================================
--- cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java (added)
+++ cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java Mon Sep 12 01:52:02 2005
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.portal.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.Properties;
+
+import org.apache.xerces.parsers.AbstractDOMParser;
+import org.apache.xerces.xni.parser.XMLInputSource;
+import org.cyberneko.html.HTMLConfiguration;
+import org.w3c.dom.Document;
+
+/**
+ * This parser uses the nekohtml parser to parse html and generate a document.
+ *
+ * @version $Id$
+ */
+public class HtmlDomParser extends AbstractDOMParser {
+
+ public HtmlDomParser(Properties properties) {
+ super(getConfig(properties));
+ }
+
+ protected static HTMLConfiguration getConfig(Properties properties) {
+ HTMLConfiguration config = new HTMLConfiguration();
+ config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
+ if (properties != null) {
+ for (Iterator i = properties.keySet().iterator();i.hasNext();) {
+ String name = (String) i.next();
+ config.setProperty(name, properties.getProperty(name));
+ }
+ }
+ return config;
+ }
+
+ /**
+ * Parse html.
+ */
+ public static Document parse(String systemId, InputStream stream, String encoding)
+ throws IOException {
+ final HtmlDomParser parser = new HtmlDomParser(null);
+ XMLInputSource source = new XMLInputSource(null, systemId, null, stream, encoding);
+ parser.parse(source);
+ return parser.getDocument();
+ }
+}
Propchange: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: cocoon/blocks/portal/trunk/java/org/apache/cocoon/portal/util/HtmlDomParser.java
------------------------------------------------------------------------------
svn:keywords = Id
Modified: cocoon/blocks/portal/trunk/pom.xml
URL: http://svn.apache.org/viewcvs/cocoon/blocks/portal/trunk/pom.xml?rev=280293&r1=280292&r2=280293&view=diff
==============================================================================
--- cocoon/blocks/portal/trunk/pom.xml (original)
+++ cocoon/blocks/portal/trunk/pom.xml Mon Sep 12 01:52:02 2005
@@ -97,11 +97,6 @@
<version>0.2</version>
</dependency>
<dependency>
- <groupId>jtidy</groupId>
- <artifactId>jtidy</artifactId>
- <version>4aug2000r7-dev</version>
- </dependency>
- <dependency>
<groupId>org.apache.pluto</groupId>
<artifactId>pluto</artifactId>
<version>1.0.1-rc4</version>
Modified: cocoon/trunk/gump.xml
URL: http://svn.apache.org/viewcvs/cocoon/trunk/gump.xml?rev=280293&r1=280292&r2=280293&view=diff
==============================================================================
--- cocoon/trunk/gump.xml (original)
+++ cocoon/trunk/gump.xml Mon Sep 12 01:52:02 2005
@@ -921,7 +921,6 @@
<depend project="cocoon-block-authentication-fw"/>
<depend project="cocoon-block-forms"/>
<depend project="cocoon-block-cron"/>
- <depend project="jtidy"/>
<depend project="nekohtml"/>
<depend project="castor"/>
<depend project="commons-collections"/>
@@ -933,7 +932,6 @@
<depend project="wsrp4j"/>
<depend project="commons-discovery"/>
- <library name="jtidy"/>
<library name="nekohtml"/>
<library name="castor"/>
<library name="commons-collections"/>