You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ja...@apache.org on 2008/12/11 16:42:20 UTC
svn commit: r725715 - in /activemq/camel/trunk: ./
camel-core/src/main/java/org/apache/camel/builder/
camel-core/src/main/java/org/apache/camel/model/
camel-core/src/main/java/org/apache/camel/model/dataformat/
camel-core/src/main/resources/org/apache/...
Author: janstey
Date: Thu Dec 11 07:42:19 2008
New Revision: 725715
URL: http://svn.apache.org/viewvc?rev=725715&view=rev
Log:
CAMEL-1184 - Add tidyMarkup dataformat for unmarshaling bad HTML into good (XML) HTML.
Thanks to Ramon Buckland for this!
Added:
activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java (with props)
activemq/camel/trunk/components/camel-tagsoup/
activemq/camel/trunk/components/camel-tagsoup/pom.xml (with props)
activemq/camel/trunk/components/camel-tagsoup/src/
activemq/camel/trunk/components/camel-tagsoup/src/main/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/
activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java (with props)
activemq/camel/trunk/components/camel-tagsoup/src/main/resources/
activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/
activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt (with props)
activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html (with props)
activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html (with props)
Modified:
activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java
activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java
activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java
activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java
activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index
activemq/camel/trunk/components/pom.xml
activemq/camel/trunk/pom.xml
Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java Thu Dec 11 07:42:19 2008
@@ -18,7 +18,6 @@
import java.util.zip.Deflater;
-import org.apache.camel.Processor;
import org.apache.camel.model.ProcessorType;
import org.apache.camel.model.dataformat.ArtixDSContentType;
import org.apache.camel.model.dataformat.ArtixDSDataFormat;
@@ -30,6 +29,7 @@
import org.apache.camel.model.dataformat.RssDataFormat;
import org.apache.camel.model.dataformat.SerializationDataFormat;
import org.apache.camel.model.dataformat.StringDataFormat;
+import org.apache.camel.model.dataformat.TidyMarkupDataFormat;
import org.apache.camel.model.dataformat.XMLBeansDataFormat;
import org.apache.camel.model.dataformat.XStreamDataFormat;
import org.apache.camel.model.dataformat.ZipDataFormat;
@@ -163,6 +163,23 @@
}
/**
+ * Return WellFormed HTML (an XML Document) either
+ * {@link java.lang.String} or {@link org.w3c.dom.Node}
+ */
+ public T tidyMarkup(Class<?> dataObjectType) {
+ return dataFormat(new TidyMarkupDataFormat(dataObjectType));
+ }
+
+ /**
+ * Return TidyMarkup HTML DataFormat (an XML Document) either
+ * as {@link org.w3c.dom.Node}
+ */
+ public T tidyMarkup() {
+ return dataFormat(new TidyMarkupDataFormat(String.class));
+ }
+
+
+ /**
* Uses the XStream data format
*/
public T xstream() {
Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java Thu Dec 11 07:42:19 2008
@@ -60,7 +60,7 @@
@XmlElement(required = false, name = "jaxb", type = JaxbDataFormat.class),
@XmlElement(required = false, name = "rss", type = RssDataFormat.class),
@XmlElement(required = false, name = "serialization", type = SerializationDataFormat.class),
- @XmlElement(required = false, name = "string", type = StringDataFormat.class),
+ @XmlElement(required = false, name = "string", type = StringDataFormat.class),
@XmlElement(required = false, name = "xmlBeans", type = XMLBeansDataFormat.class),
@XmlElement(required = false, name = "xstream", type = XStreamDataFormat.class),
@XmlElement(required = false, name = "zip", type = ZipDataFormat.class)}
Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java Thu Dec 11 07:42:19 2008
@@ -33,6 +33,7 @@
import org.apache.camel.model.dataformat.RssDataFormat;
import org.apache.camel.model.dataformat.SerializationDataFormat;
import org.apache.camel.model.dataformat.StringDataFormat;
+import org.apache.camel.model.dataformat.TidyMarkupDataFormat;
import org.apache.camel.model.dataformat.XMLBeansDataFormat;
import org.apache.camel.model.dataformat.XStreamDataFormat;
import org.apache.camel.model.dataformat.ZipDataFormat;
@@ -61,6 +62,7 @@
@XmlElement(required = false, name = "rss", type = RssDataFormat.class),
@XmlElement(required = false, name = "serialization", type = SerializationDataFormat.class),
@XmlElement(required = false, name = "string", type = StringDataFormat.class),
+ @XmlElement(required = false, name = "tidyMarkup", type = TidyMarkupDataFormat.class),
@XmlElement(required = false, name = "xmlBeans", type = XMLBeansDataFormat.class),
@XmlElement(required = false, name = "xstream", type = XStreamDataFormat.class),
@XmlElement(required = false, name = "zip", type = ZipDataFormat.class)}
Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java Thu Dec 11 07:42:19 2008
@@ -48,6 +48,7 @@
@XmlElement(required = false, name = "rss", type = RssDataFormat.class),
@XmlElement(required = false, name = "serialization", type = SerializationDataFormat.class),
@XmlElement(required = false, name = "string", type = StringDataFormat.class),
+ @XmlElement(required = false, name = "tidyMarkup", type = TidyMarkupDataFormat.class),
@XmlElement(required = false, name = "xmlBeans", type = XMLBeansDataFormat.class),
@XmlElement(required = false, name = "xstream", type = XStreamDataFormat.class),
@XmlElement(required = false, name = "zip", type = ZipDataFormat.class)}
Added: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java (added)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.model.dataformat;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.w3c.dom.Node;
+
+import org.apache.camel.spi.DataFormat;
+
+
+/**
+ * Represents a wellformed HTML document (XML well Formed) {@link DataFormat}
+ *
+ */
+@XmlRootElement(name = "tidyMarkup")
+@XmlAccessorType(XmlAccessType.FIELD)
+public class TidyMarkupDataFormat extends DataFormatType {
+
+ @XmlAttribute(required = false)
+ private Class<?> dataObjectType;
+
+ public TidyMarkupDataFormat() {
+ super("org.apache.camel.dataformat.tagsoup.TidyMarkupDataFormat");
+ this.setDataObjectType(Node.class);
+ }
+
+ public TidyMarkupDataFormat(Class<?> dataObjectType) {
+ this();
+ assert dataObjectType.isAssignableFrom(String.class) || dataObjectType.isAssignableFrom(Node.class)
+ : "TidyMarkupDataFormat only supports returning a String or a org.w3c.dom.Node object";
+ this.setDataObjectType(dataObjectType);
+ }
+
+ public void setDataObjectType(Class<?> dataObjectType) {
+ this.dataObjectType = dataObjectType;
+ }
+
+ public Class<?> getDataObjectType() {
+ return dataObjectType;
+ }
+
+ // Implementation methods
+ //-------------------------------------------------------------------------
+
+ @Override
+ protected void configureDataFormat(DataFormat dataFormat) {
+ Class<?> type = getDataObjectType();
+ if (type != null) {
+ setProperty(dataFormat, "dataObjectType", type);
+ }
+ }
+
+}
\ No newline at end of file
Propchange: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index (original)
+++ activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index Thu Dec 11 07:42:19 2008
@@ -25,6 +25,7 @@
RssDataFormat
SerializationDataFormat
StringDataFormat
+TidyMarkupDataFormat
XMLBeansDataFormat
XStreamDataFormat
-ZipDataFormat
\ No newline at end of file
+ZipDataFormat
Added: activemq/camel/trunk/components/camel-tagsoup/pom.xml
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/pom.xml?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/pom.xml (added)
+++ activemq/camel/trunk/components/camel-tagsoup/pom.xml Thu Dec 11 07:42:19 2008
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-parent</artifactId>
+ <version>2.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>camel-tagsoup</artifactId>
+ <packaging>bundle</packaging>
+ <name>Camel :: TagSoup</name>
+ <description>Camel TagSoup support</description>
+
+ <properties>
+ <camel.osgi.export.pkg>org.apache.camel.dataformat.tagsoup.*</camel.osgi.export.pkg>
+ </properties>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-core</artifactId>
+ </dependency>
+
+ <dependency>
+ <groupId>org.ccil.cowan.tagsoup</groupId>
+ <artifactId>tagsoup</artifactId>
+ </dependency>
+
+ <!-- testing -->
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-spring</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
Propchange: activemq/camel/trunk/components/camel-tagsoup/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,308 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.sax.SAXSource;
+
+import org.w3c.dom.Node;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+import org.apache.camel.CamelException;
+import org.apache.camel.Exchange;
+import org.apache.camel.spi.DataFormat;
+import org.ccil.cowan.tagsoup.HTMLSchema;
+import org.ccil.cowan.tagsoup.Parser;
+import org.ccil.cowan.tagsoup.Schema;
+import org.ccil.cowan.tagsoup.XMLWriter;
+
+/**
+ * Dataformat for TidyMarkup (aka Well formed HTML in XML form.. may or may not
+ * be XHTML) This dataformat is intended to convert bad HTML from a site (or
+ * file) into a well formed HTML document which can then be sent to XSLT or
+ * xpath'ed on.
+ *
+ */
+public class TidyMarkupDataFormat implements DataFormat {
+
+ private static final String NO = "no";
+
+ private static final String YES = "yes";
+
+ private static final String XML = "xml";
+
+ /**
+ * When returning a String, do we omit the XML ?
+ */
+ private boolean isOmitXmlDeclaration;
+
+ /**
+ * String or Node to return
+ */
+ private Class dataObjectType;
+
+ /**
+ * What is the default output format ?
+ */
+ private String method;
+
+ /**
+ * The Schema which we are parsing (default HTMLSchema)
+ */
+ private Schema parsingSchema;
+
+ /**
+ * User supplied Parser features
+ * <p>
+ * {@link http://home.ccil.org/~cowan/XML/tagsoup/#properties}
+ * {@link http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html}
+ * </p>
+ */
+ private Map<String, Boolean> parserFeatures;
+
+ /**
+ * User supplied Parser properties
+ * <p>
+ * {@link http://home.ccil.org/~cowan/XML/tagsoup/#properties}
+ * {@link http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html}
+ * </p>
+ */
+ private Map<String, Object> parserPropeties;
+
+ /**
+ * Unsupported operation
+ */
+ public void marshal(Exchange exchange, Object object, OutputStream outputStream) throws Exception {
+ throw new CamelException("Marshalling from Well Formed HTML to ugly HTML is not supported."
+ + " Only use <unmarshal><wellFormedHtml/><unmarshal>");
+ }
+
+ /**
+ * Unmarshal the data
+ */
+ public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
+
+ if (dataObjectType.isAssignableFrom(String.class)) {
+ return asStringTidyMarkup(inputStream);
+ } else if (dataObjectType.isAssignableFrom(Node.class)) {
+ return asNodeTidyMarkup(inputStream);
+ } else {
+ throw new CamelException("The return type [" + dataObjectType.getCanonicalName() + "] is unsupported");
+ }
+ }
+
+ /**
+ * Return the tidy markup as a string
+ *
+ * @param inputStream
+ * @return String of XML
+ * @throws CamelException
+ * @throws Exception
+ */
+ private String asStringTidyMarkup(InputStream inputStream) throws CamelException {
+
+ XMLReader parser = createTagSoupParser();
+ StringWriter w = new StringWriter();
+ parser.setContentHandler(createContentHandler(w));
+
+ try {
+ parser.parse(new InputSource(inputStream));
+ return w.toString();
+
+ } catch (Exception e) {
+ throw new CamelException("Failed to turn the HTML into tidy Markup", e);
+ } finally {
+ try {
+ inputStream.close();
+ } catch (Exception e) {
+ throw new CamelException("Failed to close the inputStream", e);
+ }
+ }
+ }
+
+ private Node asNodeTidyMarkup(InputStream inputStream) throws CamelException {
+ XMLReader parser = createTagSoupParser();
+ StringWriter w = new StringWriter();
+ parser.setContentHandler(createContentHandler(w));
+
+ try {
+ Transformer transformer = TransformerFactory.newInstance().newTransformer();
+ DOMResult result = new DOMResult();
+ transformer.transform(new SAXSource(parser, new InputSource(inputStream)), result);
+ return result.getNode();
+ } catch (Exception e) {
+ throw new CamelException("Failed to convert the HTML to tidy Markup (returning as a DOM Node)");
+ }
+ }
+
+ /**
+ * Create the tagSoup Parser
+ *
+ * @return
+ * @throws CamelException
+ */
+ protected XMLReader createTagSoupParser() throws CamelException {
+ XMLReader reader = new Parser();
+ try {
+ reader.setFeature(Parser.namespacesFeature, false);
+ reader.setFeature(Parser.namespacePrefixesFeature, false);
+
+ /*
+ * set each parser feature that the user may have supplied.
+ * http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html
+ * http://home.ccil.org/~cowan/XML/tagsoup/#properties
+ */
+
+ if (getParserFeatures() != null) {
+ for (Entry<String, Boolean> e : getParserFeatures().entrySet()) {
+ reader.setFeature(e.getKey(), e.getValue());
+ }
+ }
+
+ /*
+ * set each parser feature that the user may have supplied. {@link
+ * http://home.ccil.org/~cowan/XML/tagsoup/#properties}
+ */
+
+ if (getParserPropeties() != null) {
+ for (Entry<String, Object> e : getParserPropeties().entrySet()) {
+ reader.setProperty(e.getKey(), e.getValue());
+ }
+ }
+
+ /*
+ * default the schema to HTML
+ */
+ if (this.getParsingSchema() != null) {
+ reader.setProperty(Parser.schemaProperty, getParsingSchema());
+ }
+
+ } catch (Exception e) {
+ throw new CamelException("Problem setting the parser feature", e);
+ }
+ return reader;
+ }
+
+ /**
+ * @param htmlSchema
+ * the htmlSchema to set
+ */
+ public void setParsingSchema(Schema schema) {
+ this.parsingSchema = schema;
+ }
+
+ /**
+ * @return the htmlSchema
+ */
+ public Schema getParsingSchema() {
+ if (parsingSchema == null) {
+ this.parsingSchema = new HTMLSchema();
+ }
+ return parsingSchema;
+ }
+
+ protected ContentHandler createContentHandler(Writer w) {
+ XMLWriter xmlWriter = new XMLWriter(w);
+
+ // we might need to expose more than these two but that is pretty good
+ // for a default well formed Html generator
+ if (getMethod() != null) {
+ xmlWriter.setOutputProperty(XMLWriter.METHOD, getMethod());
+ } else {
+ xmlWriter.setOutputProperty(XMLWriter.METHOD, XML);
+ }
+
+ if (isOmitXmlDeclaration) {
+ xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, YES);
+ } else {
+ xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, NO);
+ }
+ return xmlWriter;
+
+ }
+
+ /**
+ * @param parserFeatures
+ * the parserFeatures to set
+ */
+ public void setParserFeatures(Map<String, Boolean> parserFeatures) {
+ this.parserFeatures = parserFeatures;
+ }
+
+ /**
+ * @return the parserFeatures
+ */
+ public Map<String, Boolean> getParserFeatures() {
+ return parserFeatures;
+ }
+
+ /**
+ * @param parserPropeties
+ * the parserPropeties to set
+ */
+ public void setParserPropeties(Map<String, Object> parserPropeties) {
+ this.parserPropeties = parserPropeties;
+ }
+
+ /**
+ * @return the parserPropeties
+ */
+ public Map<String, Object> getParserPropeties() {
+ return parserPropeties;
+ }
+
+ /**
+ * @param method
+ * the method to set
+ */
+ public void setMethod(String method) {
+ this.method = method;
+ }
+
+ /**
+ * @return the method
+ */
+ public String getMethod() {
+ return method;
+ }
+
+ /**
+ * @return the dataObjectType
+ */
+ public Class getDataObjectType() {
+ return dataObjectType;
+ }
+
+ /**
+ * @param dataObjectType
+ * the dataObjectType to set
+ */
+ public void setDataObjectType(Class dataObjectType) {
+ this.dataObjectType = dataObjectType;
+ }
+
+}
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt Thu Dec 11 07:42:19 2008
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt
------------------------------------------------------------------------------
svn:executable = *
Added: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt Thu Dec 11 07:42:19 2008
@@ -0,0 +1,11 @@
+ =========================================================================
+ == NOTICE file corresponding to the section 4 d of ==
+ == the Apache License, Version 2.0, ==
+ == in this case for the Apache Camel distribution. ==
+ =========================================================================
+
+ This product includes software developed by
+ The Apache Software Foundation (http://www.apache.org/).
+
+ Please read the different LICENSE files present in the licenses directory of
+ this distribution.
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.Exchange;
+import org.apache.camel.Message;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @version $Revision: 700232 $
+ */
+public class TidyMarkupDataFormatAsDomNodeTest extends ContextTestSupport {
+ private static final transient Log LOG = LogFactory.getLog(TidyMarkupDataFormatAsDomNodeTest.class);
+
+ public void testUnMarshalToStringOfXml() throws Exception {
+ MockEndpoint resultEndpoint = resolveMandatoryEndpoint("mock:result", MockEndpoint.class);
+ resultEndpoint.expectedMessageCount(2);
+
+ /*
+ * each of these files has a <p>TidyMarkupNode section. (no closing tag)
+ *
+ * See the route below, we send the tidyMarkup to xpath and boolean that out.
+ */
+ String badHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+ "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html"));
+ String evilHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+ "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html"));
+
+ template.sendBody("direct:start", badHtml);
+ template.sendBody("direct:start", evilHtml);
+
+ resultEndpoint.assertIsSatisfied();
+ List<Exchange> list = resultEndpoint.getReceivedExchanges();
+ for (Exchange exchange : list) {
+ Message in = exchange.getIn();
+ String response = in.getBody(String.class);
+
+ log.debug("Received " + response);
+ assertNotNull("Should be able to convert received body to a string", response);
+
+ try {
+ /*
+ * our route xpaths the existence of our signature "<p>TidyMarkupNode"
+ * but of course, by the xpath time, it is well formed
+ */
+ assertTrue(response.equals("true"));
+ } catch (Exception e) {
+
+ fail("Failed to convert the resulting String to XML: " + e.getLocalizedMessage());
+ }
+
+ }
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() {
+ return new RouteBuilder() {
+ public void configure() {
+ from("direct:start").unmarshal().tidyMarkup().setBody().xpath(
+ "boolean(//p[contains(text(),'TidyMarkupNode')])", String.class).to("mock:result");
+ }
+ };
+ }
+
+}
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.File;
+import java.util.List;
+
+import org.w3c.dom.Node;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.Exchange;
+import org.apache.camel.Message;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @version $Revision: 700232 $
+ */
+public class TidyMarkupDataFormatAsStringTest extends ContextTestSupport {
+ private static final transient Log LOG = LogFactory.getLog(TidyMarkupDataFormatAsStringTest.class);
+
+ public void testUnMarshalToStringOfXml() throws Exception {
+ MockEndpoint resultEndpoint = resolveMandatoryEndpoint("mock:result", MockEndpoint.class);
+ resultEndpoint.expectedMessageCount(2);
+
+ String badHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+ "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html"));
+ String evilHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+ "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html"));
+
+ template.sendBody("direct:start", badHtml);
+ template.sendBody("direct:start", evilHtml);
+
+ resultEndpoint.assertIsSatisfied();
+ List<Exchange> list = resultEndpoint.getReceivedExchanges();
+ for (Exchange exchange : list) {
+ try {
+ Message in = exchange.getIn();
+ Node tidyMarkup = in.getBody(Node.class);
+
+ log.debug("Received " + tidyMarkup);
+ assertNotNull("Should be able to convert received body to a string", tidyMarkup);
+
+ } catch (Exception e) {
+ fail("Failed to convert the resulting String to XML: " + e.getLocalizedMessage());
+ }
+ }
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() {
+ return new RouteBuilder() {
+ public void configure() {
+ from("direct:start").unmarshal().tidyMarkup().to("mock:result");
+ }
+ };
+ }
+
+}
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.model.dataformat.TidyMarkupDataFormat;
+
+/*
+ * This just tests whether this dataformat is available to UnmarshalType
+ */
+public class TidyMarkupDataFormatWithUnmarshalTypeTest extends TidyMarkupDataFormatAsStringTest {
+
+ @Override
+ protected RouteBuilder createRouteBuilder() {
+ return new RouteBuilder() {
+ public void configure() {
+ TidyMarkupDataFormat dataFormat = new TidyMarkupDataFormat(String.class);
+ from("direct:start").unmarshal(dataFormat).to("mock:result");
+ }
+ };
+ }
+
+}
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.StringReader;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public final class TidyMarkupTestSupport {
+
+ private TidyMarkupTestSupport() {
+ // Utility class
+ }
+
+ public static String loadFileAsString(File file) throws Exception {
+ StringBuilder fileContent = new StringBuilder();
+ BufferedReader input = new BufferedReader(new FileReader(file));
+ try {
+ String line = null;
+ while ((line = input.readLine()) != null) {
+ fileContent.append(line);
+ fileContent.append(System.getProperty("line.separator"));
+ }
+ } finally {
+ input.close();
+ }
+ return fileContent.toString();
+ }
+
+ /**
+ * Convert XML String to a Document.
+ *
+ * @param xmlString
+ * @return document Document
+ * @throws IOException
+ * @throws SAXException
+ * @throws ParserConfigurationException
+ */
+ public static Document stringToXml(String xmlString) throws SAXException, IOException, ParserConfigurationException {
+ return createDocumentBuilder().parse(new InputSource(new StringReader(xmlString)));
+ }
+
+ /**
+ * Static to generate a documentBuilder
+ *
+ * @return
+ * @throws ParserConfigurationException
+ */
+ public static DocumentBuilder createDocumentBuilder() throws ParserConfigurationException {
+
+ DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
+ docBuilderFactory.setIgnoringComments(true);
+ docBuilderFactory.setIgnoringElementContentWhitespace(true);
+ docBuilderFactory.setCoalescing(true);
+ docBuilderFactory.setExpandEntityReferences(true);
+ docBuilderFactory.setNamespaceAware(true);
+ return docBuilderFactory.newDocumentBuilder();
+ }
+}
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties Thu Dec 11 07:42:19 2008
@@ -0,0 +1,38 @@
+## ------------------------------------------------------------------------
+## Licensed to the Apache Software Foundation (ASF) under one or more
+## contributor license agreements. See the NOTICE file distributed with
+## this work for additional information regarding copyright ownership.
+## The ASF licenses this file to You under the Apache License, Version 2.0
+## (the "License"); you may not use this file except in compliance with
+## the License. You may obtain a copy of the License at
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+## ------------------------------------------------------------------------
+
+#
+# The logging properties used for eclipse testing, We want to see debug output on the console.
+#
+log4j.rootLogger=INFO, file
+
+#log4j.logger.org.apache.camel=DEBUG
+#log4j.logger.org.apache.camel.impl.converter=INFO
+#log4j.logger.org.apache.camel.dataformat.tagsoup=DEBUG
+
+# CONSOLE appender not used by default
+log4j.appender.out=org.apache.log4j.ConsoleAppender
+log4j.appender.out.layout=org.apache.log4j.PatternLayout
+log4j.appender.out.layout.ConversionPattern=[%30.30t] %-30.30c{1} %-5p %m%n
+#log4j.appender.out.layout.ConversionPattern=%d [%-15.15t] %-5p %-30.30c{1} - %m%n
+
+# File appender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d [%-15.15t] %-5p %-30.30c{1} - %m%n
+log4j.appender.file.file=target/camel-atom-test.log
+log4j.appender.file.append=true
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html Thu Dec 11 07:42:19 2008
@@ -0,0 +1,20 @@
+<html>
+ <head>
+ <title>FooBar</title>
+ <meta name=metatag value=foo>
+ </head>
+
+ <body onload="foo()" >
+ <p>
+ Some text
+
+ <p>
+ Some more Text
+ <p>TidyMarkupNode
+ <img src=filename.jpg>
+
+ <font color="red">Some red text
+
+ </body>
+
+</HTML>
\ No newline at end of file
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html Thu Dec 11 07:42:19 2008
@@ -0,0 +1,59 @@
+<html>
+<META-START>
+John Cowan
+<TABLE>
+<ROW>
+<CELL>SOUPE</CELL>
+<CELL>BE EVIL!</CELL></ROW>
+DE BALISES</TABLE>
+<CORR NEW="U" LOC="PI"/>
+<G ID="P1">
+Ecritez une balise ouvrante (sans attributs)
+</G>
+ou fermante HTML ici, s.v.p.</META-START>
+<FONT>X Y <p> ABC </FONT> xyz
+QRS<sup>TUV<sub>WXY</sup>Z</sub>
+
+<p>TidyMarkupNode
+<script language="javascript"><p></script>
+<table><tbody><tr><th>ABC
+</table><nr/>
+<meta><meta><meta><meta>
+<pre xml:space="default">test</pre>
+<test xmlns:xml="http://www.example.org/>
+</test><hr/>
+(add a random HTML tag above)
+<r:r:r:test/>
+
+<b><i></B></I>
+<b>
+ <p>bbb</b></p>
+ <p>bbb</b></p>
+ <p>bbb</b></p>
+<blink>&grec;
+<p xmlns:xqp="http://www.w3.org/1998/XML">
+ <span xqp:space="preserve">~~~</span>
+</p></blink>
+<html:p xmlns:html="http://...."></p>
+<@/><!--Apple logo in PUA-->
+<!--comment--comment-->
+<!--comment--comment>
+
+<P>]]>
+<P id="7" id="8">M</p>
+<p xmlns:a="urn" xmlns:b="urn"
+ a:id="7" b:id="9">~~~</p>
+<p id="a" idref="a"/> BE EVIL!
+<extreme sID="a" mood="happy"/>
+<extreme eID="a" mood="sad"/>
+<math><mi>2</mi><msup>3
+ </msup></math> <title>
+<verse><seg>When,</seg><seg>in</line>
+<line>the beginning</line><line>God created
+the heaven and the earth.</line></verse>
+
+<How/><To/><Markup/><Legibly/>
+<Name Name="Name">Name</Name>
+<list 4 text </p>
+<marquee>foo!</marquee>
+
\ No newline at end of file
Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: activemq/camel/trunk/components/pom.xml
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/pom.xml?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/components/pom.xml (original)
+++ activemq/camel/trunk/components/pom.xml Thu Dec 11 07:42:19 2008
@@ -91,5 +91,6 @@
<module>camel-xmlbeans</module>
<module>camel-xmpp</module>
<module>camel-xstream</module>
+ <module>camel-tagsoup</module>
</modules>
</project>
Modified: activemq/camel/trunk/pom.xml
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/pom.xml?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/pom.xml (original)
+++ activemq/camel/trunk/pom.xml Thu Dec 11 07:42:19 2008
@@ -917,6 +917,13 @@
<version>1.0.1</version>
</dependency>
+ <dependency>
+ <groupId>org.ccil.cowan.tagsoup</groupId>
+ <artifactId>tagsoup</artifactId>
+ <version>1.2</version>
+ </dependency>
+
+
</dependencies>
</dependencyManagement>