You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ja...@apache.org on 2008/12/11 16:42:20 UTC

svn commit: r725715 - in /activemq/camel/trunk: ./ camel-core/src/main/java/org/apache/camel/builder/ camel-core/src/main/java/org/apache/camel/model/ camel-core/src/main/java/org/apache/camel/model/dataformat/ camel-core/src/main/resources/org/apache/...

Author: janstey
Date: Thu Dec 11 07:42:19 2008
New Revision: 725715

URL: http://svn.apache.org/viewvc?rev=725715&view=rev
Log:
CAMEL-1184 - Add tidyMarkup dataformat for unmarshaling bad HTML into good (XML) HTML.

Thanks to Ramon Buckland for this!


Added:
    activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java   (with props)
    activemq/camel/trunk/components/camel-tagsoup/
    activemq/camel/trunk/components/camel-tagsoup/pom.xml   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/
    activemq/camel/trunk/components/camel-tagsoup/src/main/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/
    activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/main/resources/
    activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/
    activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html   (with props)
    activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html   (with props)
Modified:
    activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java
    activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java
    activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java
    activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java
    activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index
    activemq/camel/trunk/components/pom.xml
    activemq/camel/trunk/pom.xml

Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/builder/DataFormatClause.java Thu Dec 11 07:42:19 2008
@@ -18,7 +18,6 @@
 
 import java.util.zip.Deflater;
 
-import org.apache.camel.Processor;
 import org.apache.camel.model.ProcessorType;
 import org.apache.camel.model.dataformat.ArtixDSContentType;
 import org.apache.camel.model.dataformat.ArtixDSDataFormat;
@@ -30,6 +29,7 @@
 import org.apache.camel.model.dataformat.RssDataFormat;
 import org.apache.camel.model.dataformat.SerializationDataFormat;
 import org.apache.camel.model.dataformat.StringDataFormat;
+import org.apache.camel.model.dataformat.TidyMarkupDataFormat;
 import org.apache.camel.model.dataformat.XMLBeansDataFormat;
 import org.apache.camel.model.dataformat.XStreamDataFormat;
 import org.apache.camel.model.dataformat.ZipDataFormat;
@@ -163,6 +163,23 @@
     }
 
     /**
+     * Return WellFormed HTML (an XML Document) either 
+     * {@link java.lang.String} or {@link org.w3c.dom.Node}
+     */
+    public T tidyMarkup(Class<?> dataObjectType) {
+        return dataFormat(new TidyMarkupDataFormat(dataObjectType));
+    }
+
+    /**
+     * Return TidyMarkup  HTML DataFormat (an XML Document) either 
+     *  as {@link org.w3c.dom.Node}
+     */
+    public T tidyMarkup() {
+        return dataFormat(new TidyMarkupDataFormat(String.class));
+    }
+
+    
+    /**
      * Uses the XStream data format
      */
     public T xstream() {

Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/MarshalType.java Thu Dec 11 07:42:19 2008
@@ -60,7 +60,7 @@
     @XmlElement(required = false, name = "jaxb", type = JaxbDataFormat.class),
     @XmlElement(required = false, name = "rss", type = RssDataFormat.class),
     @XmlElement(required = false, name = "serialization", type = SerializationDataFormat.class),
-    @XmlElement(required = false, name = "string", type = StringDataFormat.class),
+    @XmlElement(required = false, name = "string", type = StringDataFormat.class), 
     @XmlElement(required = false, name = "xmlBeans", type = XMLBeansDataFormat.class),
     @XmlElement(required = false, name = "xstream", type = XStreamDataFormat.class),
     @XmlElement(required = false, name = "zip", type = ZipDataFormat.class)}

Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/UnmarshalType.java Thu Dec 11 07:42:19 2008
@@ -33,6 +33,7 @@
 import org.apache.camel.model.dataformat.RssDataFormat;
 import org.apache.camel.model.dataformat.SerializationDataFormat;
 import org.apache.camel.model.dataformat.StringDataFormat;
+import org.apache.camel.model.dataformat.TidyMarkupDataFormat;
 import org.apache.camel.model.dataformat.XMLBeansDataFormat;
 import org.apache.camel.model.dataformat.XStreamDataFormat;
 import org.apache.camel.model.dataformat.ZipDataFormat;
@@ -61,6 +62,7 @@
     @XmlElement(required = false, name = "rss", type = RssDataFormat.class),
     @XmlElement(required = false, name = "serialization", type = SerializationDataFormat.class),
     @XmlElement(required = false, name = "string", type = StringDataFormat.class),
+    @XmlElement(required = false, name = "tidyMarkup", type = TidyMarkupDataFormat.class),    
     @XmlElement(required = false, name = "xmlBeans", type = XMLBeansDataFormat.class),
     @XmlElement(required = false, name = "xstream", type = XStreamDataFormat.class),
     @XmlElement(required = false, name = "zip", type = ZipDataFormat.class)}

Modified: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java (original)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/DataFormatsType.java Thu Dec 11 07:42:19 2008
@@ -48,6 +48,7 @@
         @XmlElement(required = false, name = "rss", type = RssDataFormat.class),
         @XmlElement(required = false, name = "serialization", type = SerializationDataFormat.class),
         @XmlElement(required = false, name = "string", type = StringDataFormat.class),
+        @XmlElement(required = false, name = "tidyMarkup", type = TidyMarkupDataFormat.class),        
         @XmlElement(required = false, name = "xmlBeans", type = XMLBeansDataFormat.class),
         @XmlElement(required = false, name = "xstream", type = XStreamDataFormat.class),
         @XmlElement(required = false, name = "zip", type = ZipDataFormat.class)}

Added: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java (added)
+++ activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.model.dataformat;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlAttribute;
+import javax.xml.bind.annotation.XmlRootElement;
+
+import org.w3c.dom.Node;
+
+import org.apache.camel.spi.DataFormat;
+
+
+/**
+ * Represents a wellformed HTML document (XML well Formed) {@link DataFormat}
+ *
+ */
+@XmlRootElement(name = "tidyMarkup")
+@XmlAccessorType(XmlAccessType.FIELD)
+public class TidyMarkupDataFormat extends DataFormatType {
+
+    @XmlAttribute(required = false)
+    private Class<?> dataObjectType;
+
+    public TidyMarkupDataFormat() {
+        super("org.apache.camel.dataformat.tagsoup.TidyMarkupDataFormat");
+        this.setDataObjectType(Node.class);
+    }
+
+    public TidyMarkupDataFormat(Class<?> dataObjectType) {
+        this();
+        assert dataObjectType.isAssignableFrom(String.class) || dataObjectType.isAssignableFrom(Node.class) 
+            : "TidyMarkupDataFormat only supports returning a String or a org.w3c.dom.Node object";
+        this.setDataObjectType(dataObjectType);
+    }
+
+    public void setDataObjectType(Class<?> dataObjectType) {
+        this.dataObjectType = dataObjectType;
+    }
+
+    public Class<?> getDataObjectType() {
+        return dataObjectType;
+    }
+
+    // Implementation methods
+    //-------------------------------------------------------------------------
+
+    @Override
+    protected void configureDataFormat(DataFormat dataFormat) {
+        Class<?> type = getDataObjectType();
+        if (type != null) {
+            setProperty(dataFormat, "dataObjectType", type);
+        }
+    }
+
+}
\ No newline at end of file

Propchange: activemq/camel/trunk/camel-core/src/main/java/org/apache/camel/model/dataformat/TidyMarkupDataFormat.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index (original)
+++ activemq/camel/trunk/camel-core/src/main/resources/org/apache/camel/model/dataformat/jaxb.index Thu Dec 11 07:42:19 2008
@@ -25,6 +25,7 @@
 RssDataFormat
 SerializationDataFormat
 StringDataFormat
+TidyMarkupDataFormat
 XMLBeansDataFormat
 XStreamDataFormat
-ZipDataFormat
\ No newline at end of file
+ZipDataFormat

Added: activemq/camel/trunk/components/camel-tagsoup/pom.xml
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/pom.xml?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/pom.xml (added)
+++ activemq/camel/trunk/components/camel-tagsoup/pom.xml Thu Dec 11 07:42:19 2008
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-parent</artifactId>
+    <version>2.0-SNAPSHOT</version>
+  </parent>
+
+  <artifactId>camel-tagsoup</artifactId>
+  <packaging>bundle</packaging>
+  <name>Camel :: TagSoup</name>
+  <description>Camel TagSoup support</description>
+
+  <properties>
+    <camel.osgi.export.pkg>org.apache.camel.dataformat.tagsoup.*</camel.osgi.export.pkg>
+  </properties>
+
+  <dependencies>
+
+    <dependency>
+      <groupId>org.apache.camel</groupId>
+      <artifactId>camel-core</artifactId>
+    </dependency>
+
+    <dependency>
+        <groupId>org.ccil.cowan.tagsoup</groupId>
+        <artifactId>tagsoup</artifactId>
+    </dependency>
+
+    <!-- testing -->
+    <dependency>
+      <groupId>org.apache.camel</groupId>
+      <artifactId>camel-core</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.camel</groupId>
+      <artifactId>camel-spring</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+</project>

Propchange: activemq/camel/trunk/components/camel-tagsoup/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,308 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.sax.SAXSource;
+
+import org.w3c.dom.Node;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.InputSource;
+import org.xml.sax.XMLReader;
+import org.apache.camel.CamelException;
+import org.apache.camel.Exchange;
+import org.apache.camel.spi.DataFormat;
+import org.ccil.cowan.tagsoup.HTMLSchema;
+import org.ccil.cowan.tagsoup.Parser;
+import org.ccil.cowan.tagsoup.Schema;
+import org.ccil.cowan.tagsoup.XMLWriter;
+
+/**
+ * Dataformat for TidyMarkup (aka Well formed HTML in XML form.. may or may not
+ * be XHTML) This dataformat is intended to convert bad HTML from a site (or
+ * file) into a well formed HTML document which can then be sent to XSLT or
+ * xpath'ed on.
+ * 
+ */
+public class TidyMarkupDataFormat implements DataFormat {
+
+    private static final String NO = "no";
+
+    private static final String YES = "yes";
+
+    private static final String XML = "xml";
+
+    /**
+     * When returning a String, do we omit the XML ?
+     */
+    private boolean isOmitXmlDeclaration;
+
+    /**
+     * String or Node to return
+     */
+    private Class dataObjectType;
+
+    /**
+     * What is the default output format ?
+     */
+    private String method;
+
+    /**
+     * The Schema which we are parsing (default HTMLSchema)
+     */
+    private Schema parsingSchema;
+
+    /**
+     * User supplied Parser features
+     * <p>
+     * {@link http://home.ccil.org/~cowan/XML/tagsoup/#properties}
+     * {@link http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html}
+     * </p>
+     */
+    private Map<String, Boolean> parserFeatures;
+
+    /**
+     * User supplied Parser properties
+     * <p>
+     * {@link http://home.ccil.org/~cowan/XML/tagsoup/#properties}
+     * {@link http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html}
+     * </p>
+     */
+    private Map<String, Object> parserPropeties;
+
+    /**
+     * Unsupported operation
+     */
+    public void marshal(Exchange exchange, Object object, OutputStream outputStream) throws Exception {
+        throw new CamelException("Marshalling from Well Formed HTML to ugly HTML is not supported."
+                + " Only use <unmarshal><wellFormedHtml/><unmarshal>");
+    }
+
+    /**
+     * Unmarshal the data
+     */
+    public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
+
+        if (dataObjectType.isAssignableFrom(String.class)) {
+            return asStringTidyMarkup(inputStream);
+        } else if (dataObjectType.isAssignableFrom(Node.class)) {
+            return asNodeTidyMarkup(inputStream);
+        } else {
+            throw new CamelException("The return type [" + dataObjectType.getCanonicalName() + "] is unsupported");
+        }
+    }
+
+    /**
+     * Return the tidy markup as a string
+     * 
+     * @param inputStream
+     * @return String of XML
+     * @throws CamelException
+     * @throws Exception
+     */
+    private String asStringTidyMarkup(InputStream inputStream) throws CamelException {
+
+        XMLReader parser = createTagSoupParser();
+        StringWriter w = new StringWriter();
+        parser.setContentHandler(createContentHandler(w));
+
+        try {
+            parser.parse(new InputSource(inputStream));
+            return w.toString();
+
+        } catch (Exception e) {
+            throw new CamelException("Failed to turn the HTML into tidy Markup", e);
+        } finally {
+            try {
+                inputStream.close();
+            } catch (Exception e) {
+                throw new CamelException("Failed to close the inputStream", e);
+            }
+        }
+    }
+
+    private Node asNodeTidyMarkup(InputStream inputStream) throws CamelException {
+        XMLReader parser = createTagSoupParser();
+        StringWriter w = new StringWriter();
+        parser.setContentHandler(createContentHandler(w));
+
+        try {
+            Transformer transformer = TransformerFactory.newInstance().newTransformer();
+            DOMResult result = new DOMResult();
+            transformer.transform(new SAXSource(parser, new InputSource(inputStream)), result);
+            return result.getNode();
+        } catch (Exception e) {
+            throw new CamelException("Failed to convert the HTML to tidy Markup (returning as a DOM Node)");
+        }
+    }
+
+    /**
+     * Create the tagSoup Parser
+     * 
+     * @return
+     * @throws CamelException
+     */
+    protected XMLReader createTagSoupParser() throws CamelException {
+        XMLReader reader = new Parser();
+        try {
+            reader.setFeature(Parser.namespacesFeature, false);
+            reader.setFeature(Parser.namespacePrefixesFeature, false);
+
+            /*
+             * set each parser feature that the user may have supplied.
+             * http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html
+             * http://home.ccil.org/~cowan/XML/tagsoup/#properties
+             */
+
+            if (getParserFeatures() != null) {
+                for (Entry<String, Boolean> e : getParserFeatures().entrySet()) {
+                    reader.setFeature(e.getKey(), e.getValue());
+                }
+            }
+
+            /*
+             * set each parser feature that the user may have supplied. {@link
+             * http://home.ccil.org/~cowan/XML/tagsoup/#properties}
+             */
+
+            if (getParserPropeties() != null) {
+                for (Entry<String, Object> e : getParserPropeties().entrySet()) {
+                    reader.setProperty(e.getKey(), e.getValue());
+                }
+            }
+
+            /*
+             * default the schema to HTML
+             */
+            if (this.getParsingSchema() != null) {
+                reader.setProperty(Parser.schemaProperty, getParsingSchema());
+            }
+
+        } catch (Exception e) {
+            throw new CamelException("Problem setting the parser feature", e);
+        }
+        return reader;
+    }
+
+    /**
+     * @param htmlSchema
+     *            the htmlSchema to set
+     */
+    public void setParsingSchema(Schema schema) {
+        this.parsingSchema = schema;
+    }
+
+    /**
+     * @return the htmlSchema
+     */
+    public Schema getParsingSchema() {
+        if (parsingSchema == null) {
+            this.parsingSchema = new HTMLSchema();
+        }
+        return parsingSchema;
+    }
+
+    protected ContentHandler createContentHandler(Writer w) {
+        XMLWriter xmlWriter = new XMLWriter(w);
+
+        // we might need to expose more than these two but that is pretty good
+        // for a default well formed Html generator
+        if (getMethod() != null) {
+            xmlWriter.setOutputProperty(XMLWriter.METHOD, getMethod());
+        } else {
+            xmlWriter.setOutputProperty(XMLWriter.METHOD, XML);
+        }
+
+        if (isOmitXmlDeclaration) {
+            xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, YES);
+        } else {
+            xmlWriter.setOutputProperty(XMLWriter.OMIT_XML_DECLARATION, NO);
+        }
+        return xmlWriter;
+
+    }
+
+    /**
+     * @param parserFeatures
+     *            the parserFeatures to set
+     */
+    public void setParserFeatures(Map<String, Boolean> parserFeatures) {
+        this.parserFeatures = parserFeatures;
+    }
+
+    /**
+     * @return the parserFeatures
+     */
+    public Map<String, Boolean> getParserFeatures() {
+        return parserFeatures;
+    }
+
+    /**
+     * @param parserPropeties
+     *            the parserPropeties to set
+     */
+    public void setParserPropeties(Map<String, Object> parserPropeties) {
+        this.parserPropeties = parserPropeties;
+    }
+
+    /**
+     * @return the parserPropeties
+     */
+    public Map<String, Object> getParserPropeties() {
+        return parserPropeties;
+    }
+
+    /**
+     * @param method
+     *            the method to set
+     */
+    public void setMethod(String method) {
+        this.method = method;
+    }
+
+    /**
+     * @return the method
+     */
+    public String getMethod() {
+        return method;
+    }
+
+    /**
+     * @return the dataObjectType
+     */
+    public Class getDataObjectType() {
+        return dataObjectType;
+    }
+
+    /**
+     * @param dataObjectType
+     *            the dataObjectType to set
+     */
+    public void setDataObjectType(Class dataObjectType) {
+        this.dataObjectType = dataObjectType;
+    }
+
+}

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormat.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt Thu Dec 11 07:42:19 2008
@@ -0,0 +1,203 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/LICENSE.txt
------------------------------------------------------------------------------
    svn:executable = *

Added: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt Thu Dec 11 07:42:19 2008
@@ -0,0 +1,11 @@
+   =========================================================================
+   ==  NOTICE file corresponding to the section 4 d of                    ==
+   ==  the Apache License, Version 2.0,                                   ==
+   ==  in this case for the Apache Camel distribution.                    ==
+   =========================================================================
+
+   This product includes software developed by
+   The Apache Software Foundation (http://www.apache.org/).
+
+   Please read the different LICENSE files present in the licenses directory of
+   this distribution.

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/main/resources/META-INF/NOTICE.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.File;
+import java.util.List;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.Exchange;
+import org.apache.camel.Message;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @version $Revision: 700232 $
+ */
+public class TidyMarkupDataFormatAsDomNodeTest extends ContextTestSupport {
+    private static final transient Log LOG = LogFactory.getLog(TidyMarkupDataFormatAsDomNodeTest.class);
+
+    public void testUnMarshalToStringOfXml() throws Exception {
+        MockEndpoint resultEndpoint = resolveMandatoryEndpoint("mock:result", MockEndpoint.class);
+        resultEndpoint.expectedMessageCount(2);
+
+        /*
+         * each of these files has a <p>TidyMarkupNode section. (no closing tag)
+         * 
+         * See the route below, we send the tidyMarkup to xpath and boolean that out.
+         */
+        String badHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+                "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html"));
+        String evilHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+                "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html"));
+
+        template.sendBody("direct:start", badHtml);
+        template.sendBody("direct:start", evilHtml);
+
+        resultEndpoint.assertIsSatisfied();
+        List<Exchange> list = resultEndpoint.getReceivedExchanges();
+        for (Exchange exchange : list) {
+            Message in = exchange.getIn();
+            String response = in.getBody(String.class);
+
+            log.debug("Received " + response);
+            assertNotNull("Should be able to convert received body to a string", response);
+
+            try {
+                /*
+                 * our route xpaths the existence of our signature "<p>TidyMarkupNode"
+                 * but of course, by the xpath time, it is well formed
+                 */
+                assertTrue(response.equals("true"));
+            } catch (Exception e) {
+
+                fail("Failed to convert the resulting String to XML: " + e.getLocalizedMessage());
+            }
+
+        }
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() {
+        return new RouteBuilder() {
+            public void configure() {
+                from("direct:start").unmarshal().tidyMarkup().setBody().xpath(
+                        "boolean(//p[contains(text(),'TidyMarkupNode')])", String.class).to("mock:result");
+            }
+        };
+    }
+
+}

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsDomNodeTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.File;
+import java.util.List;
+
+import org.w3c.dom.Node;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.Exchange;
+import org.apache.camel.Message;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * @version $Revision: 700232 $
+ */
+public class TidyMarkupDataFormatAsStringTest extends ContextTestSupport {
+    private static final transient Log LOG = LogFactory.getLog(TidyMarkupDataFormatAsStringTest.class);
+
+    public void testUnMarshalToStringOfXml() throws Exception {
+        MockEndpoint resultEndpoint = resolveMandatoryEndpoint("mock:result", MockEndpoint.class);
+        resultEndpoint.expectedMessageCount(2);
+
+        String badHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+                "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html"));
+        String evilHtml = TidyMarkupTestSupport.loadFileAsString(new File(
+                "src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html"));
+
+        template.sendBody("direct:start", badHtml);
+        template.sendBody("direct:start", evilHtml);
+
+        resultEndpoint.assertIsSatisfied();
+        List<Exchange> list = resultEndpoint.getReceivedExchanges();
+        for (Exchange exchange : list) {
+            try {
+                Message in = exchange.getIn();
+                Node tidyMarkup = in.getBody(Node.class);
+
+                log.debug("Received " + tidyMarkup);
+                assertNotNull("Should be able to convert received body to a string", tidyMarkup);
+                
+            } catch (Exception e) {
+                fail("Failed to convert the resulting String to XML: " + e.getLocalizedMessage());
+            }
+        }
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() {
+        return new RouteBuilder() {
+            public void configure() {
+                from("direct:start").unmarshal().tidyMarkup().to("mock:result");
+            }
+        };
+    }
+
+}

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatAsStringTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.model.dataformat.TidyMarkupDataFormat;
+
+/*
+ * This just tests whether this dataformat is available to UnmarshalType
+ */
+public class TidyMarkupDataFormatWithUnmarshalTypeTest extends TidyMarkupDataFormatAsStringTest {
+
+    @Override
+    protected RouteBuilder createRouteBuilder() {
+        return new RouteBuilder() {
+            public void configure() {                
+                TidyMarkupDataFormat dataFormat = new TidyMarkupDataFormat(String.class);
+                from("direct:start").unmarshal(dataFormat).to("mock:result");
+            }
+        };
+    }
+
+}

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupDataFormatWithUnmarshalTypeTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java Thu Dec 11 07:42:19 2008
@@ -0,0 +1,84 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.dataformat.tagsoup;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.StringReader;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.w3c.dom.Document;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+public final class TidyMarkupTestSupport {
+    
+    private TidyMarkupTestSupport() {
+        // Utility class
+    }
+    
+    public static String loadFileAsString(File file) throws Exception {
+        StringBuilder fileContent = new StringBuilder();
+        BufferedReader input = new BufferedReader(new FileReader(file));
+        try {
+            String line = null;
+            while ((line = input.readLine()) != null) {
+                fileContent.append(line);
+                fileContent.append(System.getProperty("line.separator"));
+            }
+        } finally {
+            input.close();
+        }
+        return fileContent.toString();
+    }
+
+    /**
+     * Convert XML String to a Document.
+     * 
+     * @param xmlString
+     * @return document Document
+     * @throws IOException
+     * @throws SAXException
+     * @throws ParserConfigurationException
+     */
+    public static Document stringToXml(String xmlString) throws SAXException, IOException, ParserConfigurationException {
+        return createDocumentBuilder().parse(new InputSource(new StringReader(xmlString)));
+    }
+
+    /**
+     * Static to generate a documentBuilder
+     * 
+     * @return
+     * @throws ParserConfigurationException
+     */
+    public static DocumentBuilder createDocumentBuilder() throws ParserConfigurationException {
+
+        DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
+        docBuilderFactory.setIgnoringComments(true);
+        docBuilderFactory.setIgnoringElementContentWhitespace(true);
+        docBuilderFactory.setCoalescing(true);
+        docBuilderFactory.setExpandEntityReferences(true);
+        docBuilderFactory.setNamespaceAware(true);
+        return docBuilderFactory.newDocumentBuilder();
+    }
+}

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/java/org/apache/camel/dataformat/tagsoup/TidyMarkupTestSupport.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties Thu Dec 11 07:42:19 2008
@@ -0,0 +1,38 @@
+## ------------------------------------------------------------------------
+## Licensed to the Apache Software Foundation (ASF) under one or more
+## contributor license agreements.  See the NOTICE file distributed with
+## this work for additional information regarding copyright ownership.
+## The ASF licenses this file to You under the Apache License, Version 2.0
+## (the "License"); you may not use this file except in compliance with
+## the License.  You may obtain a copy of the License at
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+## ------------------------------------------------------------------------
+
+#
+# The logging properties used for eclipse testing, We want to see debug output on the console.
+#
+log4j.rootLogger=INFO, file
+
+#log4j.logger.org.apache.camel=DEBUG
+#log4j.logger.org.apache.camel.impl.converter=INFO
+#log4j.logger.org.apache.camel.dataformat.tagsoup=DEBUG
+
+# CONSOLE appender not used by default
+log4j.appender.out=org.apache.log4j.ConsoleAppender
+log4j.appender.out.layout=org.apache.log4j.PatternLayout
+log4j.appender.out.layout.ConversionPattern=[%30.30t] %-30.30c{1} %-5p %m%n
+#log4j.appender.out.layout.ConversionPattern=%d [%-15.15t] %-5p %-30.30c{1} - %m%n
+
+# File appender
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d [%-15.15t] %-5p %-30.30c{1} - %m%n
+log4j.appender.file.file=target/camel-atom-test.log
+log4j.appender.file.append=true

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/log4j.properties
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html Thu Dec 11 07:42:19 2008
@@ -0,0 +1,20 @@
+<html>
+     <head>
+      <title>FooBar</title>
+      <meta name=metatag value=foo>
+     </head>
+     
+     <body onload="foo()"  >
+         <p>
+            Some text
+            
+         <p>
+             Some more Text
+     <p>TidyMarkupNode
+          <img src=filename.jpg>
+          
+          <font color="red">Some red text
+          
+     </body>
+    
+</HTML>
\ No newline at end of file

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile1.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html?rev=725715&view=auto
==============================================================================
--- activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html (added)
+++ activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html Thu Dec 11 07:42:19 2008
@@ -0,0 +1,59 @@
+<html>
+<META-START>
+John Cowan
+<TABLE>
+<ROW>
+<CELL>SOUPE</CELL>
+<CELL>BE EVIL!</CELL></ROW>
+DE BALISES</TABLE>
+<CORR NEW="U" LOC="PI"/>
+<G ID="P1">
+Ecritez une balise ouvrante (sans attributs)
+</G>
+ou fermante HTML ici, s.v.p.</META-START>
+<FONT>X Y <p> ABC </FONT> xyz
+QRS<sup>TUV<sub>WXY</sup>Z</sub>
+
+<p>TidyMarkupNode
+<script language="javascript"><p></script>
+<table><tbody><tr><th>ABC
+</table><nr/>
+<meta><meta><meta><meta>
+<pre xml:space="default">test</pre>
+<test xmlns:xml="http://www.example.org/>
+</test><hr/>
+(add a random HTML tag above)
+<r:r:r:test/>
+
+<b><i></B></I>
+<b>
+  <p>bbb</b></p>
+  <p>bbb</b></p>
+  <p>bbb</b></p>
+<blink>&grec;
+<p xmlns:xqp="http://www.w3.org/1998/XML">
+ <span xqp:space="preserve">~~~</span>
+</p></blink>
+<html:p xmlns:html="http://...."></p>
+<@/><!--Apple logo in PUA-->
+<!--comment--comment-->
+<!--comment--comment>
+
+<P>]]>
+<P id="7" id="8">M</p>
+<p xmlns:a="urn" xmlns:b="urn"
+   a:id="7" b:id="9">~~~</p>
+<p id="a" idref="a"/>  BE EVIL!
+<extreme sID="a" mood="happy"/>
+<extreme eID="a" mood="sad"/>
+<math><mi>2</mi><msup>3
+  </msup></math>  <title>
+<verse><seg>When,</seg><seg>in</line>
+<line>the beginning</line><line>God created
+the heaven and the earth.</line></verse>
+
+<How/><To/><Markup/><Legibly/>
+<Name Name="Name">Name</Name>
+<list 4 text </p>
+<marquee>foo!</marquee>
+		
\ No newline at end of file

Propchange: activemq/camel/trunk/components/camel-tagsoup/src/test/resources/org/apache/camel/dataformat/tagsoup/testfile2-evilHtml.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: activemq/camel/trunk/components/pom.xml
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/components/pom.xml?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/components/pom.xml (original)
+++ activemq/camel/trunk/components/pom.xml Thu Dec 11 07:42:19 2008
@@ -91,5 +91,6 @@
     <module>camel-xmlbeans</module>
     <module>camel-xmpp</module>
     <module>camel-xstream</module>
+    <module>camel-tagsoup</module>
   </modules>
 </project>

Modified: activemq/camel/trunk/pom.xml
URL: http://svn.apache.org/viewvc/activemq/camel/trunk/pom.xml?rev=725715&r1=725714&r2=725715&view=diff
==============================================================================
--- activemq/camel/trunk/pom.xml (original)
+++ activemq/camel/trunk/pom.xml Thu Dec 11 07:42:19 2008
@@ -917,6 +917,13 @@
         <version>1.0.1</version>
       </dependency>
 
+      <dependency>
+        <groupId>org.ccil.cowan.tagsoup</groupId>
+        <artifactId>tagsoup</artifactId>
+        <version>1.2</version>
+      </dependency>
+
+
     </dependencies>
   </dependencyManagement>