You are viewing a plain text version of this content. The canonical link for it is here.
Posted to portalapps-dev@portals.apache.org by wo...@apache.org on 2014/07/06 01:33:22 UTC

svn commit: r1608141 - in /portals/applications/webcontent/trunk: ./ content-rewriter/ content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/ content-rewriter/src/test/java/org/apache/portals/applications/webc...

Author: woonsan
Date: Sat Jul  5 23:33:22 2014
New Revision: 1608141

URL: http://svn.apache.org/r1608141
Log:
APA-59: Initial API definition for content rewriter.
- Verifying the API with a htmlcleaner based test case

Added:
    portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/
    portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java
    portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java
    portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java
    portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java
    portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html
Modified:
    portals/applications/webcontent/trunk/content-rewriter/pom.xml
    portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java
    portals/applications/webcontent/trunk/pom.xml

Modified: portals/applications/webcontent/trunk/content-rewriter/pom.xml
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/pom.xml?rev=1608141&r1=1608140&r2=1608141&view=diff
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/pom.xml (original)
+++ portals/applications/webcontent/trunk/content-rewriter/pom.xml Sat Jul  5 23:33:22 2014
@@ -47,6 +47,11 @@
     </dependency>
 
     <dependency>
+      <groupId>net.sourceforge.htmlcleaner</groupId>
+      <artifactId>htmlcleaner</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>commons-lang</groupId>
       <artifactId>commons-lang</artifactId>
     </dependency>

Added: portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java Sat Jul  5 23:33:22 2014
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner;
+
+import org.apache.commons.beanutils.ConstructorUtils;
+import org.htmlcleaner.Serializer;
+
+public class DefaultSerializerFactory implements SerializerFactory
+{
+    private Class<? extends Serializer> serializerClass;
+    private Object[] arguments;
+
+    public DefaultSerializerFactory()
+    {
+    }
+
+    public Class<? extends Serializer> getSerializerClass()
+    {
+        return serializerClass;
+    }
+
+    public void setSerializerClass(Class<? extends Serializer> serializerClass)
+    {
+        this.serializerClass = serializerClass;
+    }
+
+    public Object[] getArguments()
+    {
+        return arguments;
+    }
+
+    public void setArguments(Object[] arguments)
+    {
+        this.arguments = arguments;
+    }
+
+    public Serializer createSerializer() throws Exception
+    {
+        return (Serializer) ConstructorUtils.invokeConstructor(serializerClass, arguments);
+    }
+}

Added: portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java Sat Jul  5 23:33:22 2014
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.portals.applications.webcontent2.rewriter2.ContentRewriter;
+import org.apache.portals.applications.webcontent2.rewriter2.ContentRewritingContext;
+import org.apache.portals.applications.webcontent2.rewriter2.ContentRewritingException;
+import org.apache.portals.applications.webcontent2.rewriter2.Sink;
+import org.apache.portals.applications.webcontent2.rewriter2.Source;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.Serializer;
+import org.htmlcleaner.TagNode;
+import org.htmlcleaner.TagNodeVisitor;
+
+public class HtmlCleanerContentRewriter implements ContentRewriter
+{
+    private final HtmlCleaner cleaner;
+    private final SerializerFactory serializerFactory;
+    private TagNodeVisitor tagNodeVisitor;
+
+    public HtmlCleanerContentRewriter(final HtmlCleaner cleaner, final SerializerFactory serializerFactory)
+    {
+        this.cleaner = cleaner;
+        this.serializerFactory = serializerFactory;
+    }
+
+    public void setTagNodeVisitor(TagNodeVisitor tagNodeVisitor)
+    {
+        this.tagNodeVisitor = tagNodeVisitor;
+    }
+
+    public void rewrite(Source source, Sink sink, ContentRewritingContext context) throws ContentRewritingException, IOException
+    {
+        Serializer serializer = null;
+
+        try {
+            serializer = serializerFactory.createSerializer();
+        } catch (Exception e) {
+            throw new ContentRewritingException("Failed to create serializer. " + e, e);
+        }
+
+        Reader reader = null;
+        BufferedReader br = null;
+        Writer writer = null;
+        BufferedWriter bw = null;
+
+        try {
+            reader = source.getReader();
+            br = new BufferedReader(reader);
+            writer = sink.getWriter();
+            bw = new BufferedWriter(writer);
+
+            TagNode tagNode = cleaner.clean(br);
+
+            if (tagNodeVisitor != null) {
+                tagNode.traverse(tagNodeVisitor);
+            }
+
+            serializer.write(tagNode, writer, "UTF-8");
+        } finally {
+            IOUtils.closeQuietly(br);
+            IOUtils.closeQuietly(reader);
+            IOUtils.closeQuietly(bw);
+            IOUtils.closeQuietly(writer);
+        }
+    }
+}

Added: portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java Sat Jul  5 23:33:22 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner;
+
+import org.htmlcleaner.Serializer;
+
+public interface SerializerFactory
+{
+    public Serializer createSerializer() throws Exception;
+}

Added: portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java Sat Jul  5 23:33:22 2014
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.commons.io.output.TeeOutputStream;
+import org.apache.commons.lang.StringUtils;
+import org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner.DefaultSerializerFactory;
+import org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner.HtmlCleanerContentRewriter;
+import org.apache.portals.applications.webcontent2.rewriter2.impl.SimpleContentRewritingContext;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.HtmlNode;
+import org.htmlcleaner.SimpleHtmlSerializer;
+import org.htmlcleaner.TagNode;
+import org.htmlcleaner.TagNodeVisitor;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HtmlCleanerContentRewriterTest
+{
+
+    private static Logger log = LoggerFactory.getLogger(HtmlCleanerContentRewriterTest.class);
+
+    private Source source;
+    private Sink sink;
+    private ByteArrayOutputStream sinkTeeOut;
+    private HtmlCleanerContentRewriter contentRewriter;
+
+    @Before
+    public void before() {
+        source = new Source() {
+            public InputStream getInputStream() throws IOException
+            {
+                return HtmlCleanerContentRewriterTest.class.getResourceAsStream("guidelines.html");
+            }
+            public Reader getReader() throws IOException
+            {
+                return new InputStreamReader(getInputStream());
+            }
+        };
+
+        sinkTeeOut = new ByteArrayOutputStream();
+        sink = new Sink() {
+            public OutputStream getOutputStream() throws IOException
+            {
+                return new TeeOutputStream(System.out, sinkTeeOut);
+            }
+            public Writer getWriter() throws IOException
+            {
+                return new OutputStreamWriter(getOutputStream());
+            }
+        };
+
+        HtmlCleaner cleaner = new HtmlCleaner();
+        cleaner.getProperties().setOmitXmlDeclaration(true);
+
+        DefaultSerializerFactory serializerFactory = new DefaultSerializerFactory();
+        serializerFactory.setSerializerClass(SimpleHtmlSerializer.class);
+        serializerFactory.setArguments(new Object [] { cleaner.getProperties() });
+
+        contentRewriter = new HtmlCleanerContentRewriter(cleaner, serializerFactory);
+
+        final String siteUrl = "http://www.example.com/";
+        contentRewriter.setTagNodeVisitor(new TagNodeVisitor() {
+            public boolean visit(TagNode tagNode, HtmlNode htmlNode) {
+                if (htmlNode instanceof TagNode) {
+                    TagNode tag = (TagNode) htmlNode;
+                    String tagName = tag.getName();
+
+                    if ("a".equals(tagName) || "link".equals(tagName)) {
+                        String href = tag.getAttributeByName("href");
+
+                        if (href != null) {
+                            tag.addAttribute("href", siteUrl + StringUtils.removeStart(href, "./"));
+                        }
+                    }
+
+                    if ("a".equals(tagName) && "Project_Guidelines".equals(tag.getAttributeByName("name"))) {
+                        tag.removeFromTree();
+                    }
+                }
+
+                // tells visitor to continue traversing the DOM tree
+                return true;
+            }
+        });
+    }
+
+    @Test
+    public void testRewriter() throws Exception {
+        ContentRewritingContext rewritingContext = new SimpleContentRewritingContext();
+        contentRewriter.rewrite(source, sink, rewritingContext);
+        String output = sinkTeeOut.toString();
+        log.debug("OUTPUT: {}", output);
+
+        assertTrue(output.contains("<link rel=\"stylesheet\" href=\"http://www.example.com/css/print.css\" type=\"text/css\" media=\"print\" />"));
+        assertFalse(output.contains("name=\"Project_Guidelines\""));
+        assertTrue(output.contains("<a href=\"http://www.example.com/roles.html\">Roles and Responsibilities</a>"));
+        assertTrue(output.contains("general@portals <a href=\"http://www.example.com/mail-lists.html#general\">"));
+    }
+
+}

Modified: portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java?rev=1608141&r1=1608140&r2=1608141&view=diff
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java (original)
+++ portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java Sat Jul  5 23:33:22 2014
@@ -79,7 +79,7 @@ public class TextLineContentRewriterTest
     }
 
     @Test
-    public void testA() throws Exception {
+    public void testRewriter() throws Exception {
         ContentRewritingContext rewritingContext = new SimpleContentRewritingContext();
         contentRewriter.rewrite(source, sink, rewritingContext);
         String output = sinkTeeOut.toString();

Added: portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html Sat Jul  5 23:33:22 2014
@@ -0,0 +1,41 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<title>The Apache Portals Site - Project Guidelines</title>
+<link rel="stylesheet" href="./css/print.css" type="text/css"
+	media="print" />
+<meta name="author" content="Apache Portals Project" />
+</head>
+<body>
+
+	<h2>
+		<a name="Project_Guidelines"></a>Project Guidelines
+	</h2>
+	<p>This document defines the guidelines of the Portals Project. It
+		includes definitions of the various categories of membership, who is
+		able to vote, how conflicts are resolved by voting, and the procedures
+		to follow for proposing and making changes to the codebase of the
+		Project.</p>
+	<ul>
+		<li><a href="./roles.html">Roles and Responsibilities</a><br />
+			Defines the recognized roles in the project.</li>
+		<li><a href="./communication.html">Communication</a><br />
+			Defines how users and developers communicate.</li>
+		<li><a href="./decisions.html">Decision Making</a><br /> Defines
+			how action items are proposed and voted on.</li>
+		<li><a href="./management.html">Project Management</a><br />
+			Defines the roles and responsibilities of the Project Management
+			Committee (PMC).</li>
+		<li><a href="./newproject.html">New Subproject Proposals</a><br />
+			Defines the methodology for proposing new top level Jakarta
+			Subprojects.</li>
+	</ul>
+	<p>
+		This is a living document. Changes can be made by the Project
+		Management Committee. Suggestions for changes should be discussed on
+		the general@portals <a href="./mail-lists.html#general">mailing
+			list</a>
+	</p>
+</body>
+</html>
\ No newline at end of file

Modified: portals/applications/webcontent/trunk/pom.xml
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/pom.xml?rev=1608141&r1=1608140&r2=1608141&view=diff
==============================================================================
--- portals/applications/webcontent/trunk/pom.xml (original)
+++ portals/applications/webcontent/trunk/pom.xml Sat Jul  5 23:33:22 2014
@@ -42,6 +42,7 @@
     <junit.version>4.11</junit.version>
     <easymock.version>3.2</easymock.version>
     <nekohtml.version>0.9.5</nekohtml.version>
+    <htmlcleaner.version>2.8</htmlcleaner.version>
     <castor.version>1.1.1</castor.version>
     <httpcomponents-httpcore.version>4.0.1</httpcomponents-httpcore.version>
     <httpcomponents-httpclient.version>4.0</httpcomponents-httpclient.version>
@@ -159,6 +160,12 @@
       </dependency>
 
       <dependency>
+        <groupId>net.sourceforge.htmlcleaner</groupId>
+        <artifactId>htmlcleaner</artifactId>
+        <version>${htmlcleaner.version}</version>
+      </dependency>
+
+      <dependency>
         <groupId>org.apache.httpcomponents</groupId>
         <artifactId>httpcore</artifactId>
         <version>${httpcomponents-httpcore.version}</version>