You are viewing a plain text version of this content. The canonical link for it is here.
Posted to portalapps-dev@portals.apache.org by wo...@apache.org on 2014/07/06 01:33:22 UTC
svn commit: r1608141 - in /portals/applications/webcontent/trunk: ./
content-rewriter/
content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/
content-rewriter/src/test/java/org/apache/portals/applications/webc...
Author: woonsan
Date: Sat Jul 5 23:33:22 2014
New Revision: 1608141
URL: http://svn.apache.org/r1608141
Log:
APA-59: Initial API definition for content rewriter.
- Verifying the API with a htmlcleaner based test case
Added:
portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/
portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java
portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java
portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java
portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java
portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html
Modified:
portals/applications/webcontent/trunk/content-rewriter/pom.xml
portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java
portals/applications/webcontent/trunk/pom.xml
Modified: portals/applications/webcontent/trunk/content-rewriter/pom.xml
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/pom.xml?rev=1608141&r1=1608140&r2=1608141&view=diff
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/pom.xml (original)
+++ portals/applications/webcontent/trunk/content-rewriter/pom.xml Sat Jul 5 23:33:22 2014
@@ -47,6 +47,11 @@
</dependency>
<dependency>
+ <groupId>net.sourceforge.htmlcleaner</groupId>
+ <artifactId>htmlcleaner</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</dependency>
Added: portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/DefaultSerializerFactory.java Sat Jul 5 23:33:22 2014
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner;
+
+import org.apache.commons.beanutils.ConstructorUtils;
+import org.htmlcleaner.Serializer;
+
+public class DefaultSerializerFactory implements SerializerFactory
+{
+ private Class<? extends Serializer> serializerClass;
+ private Object[] arguments;
+
+ public DefaultSerializerFactory()
+ {
+ }
+
+ public Class<? extends Serializer> getSerializerClass()
+ {
+ return serializerClass;
+ }
+
+ public void setSerializerClass(Class<? extends Serializer> serializerClass)
+ {
+ this.serializerClass = serializerClass;
+ }
+
+ public Object[] getArguments()
+ {
+ return arguments;
+ }
+
+ public void setArguments(Object[] arguments)
+ {
+ this.arguments = arguments;
+ }
+
+ public Serializer createSerializer() throws Exception
+ {
+ return (Serializer) ConstructorUtils.invokeConstructor(serializerClass, arguments);
+ }
+}
Added: portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/HtmlCleanerContentRewriter.java Sat Jul 5 23:33:22 2014
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.portals.applications.webcontent2.rewriter2.ContentRewriter;
+import org.apache.portals.applications.webcontent2.rewriter2.ContentRewritingContext;
+import org.apache.portals.applications.webcontent2.rewriter2.ContentRewritingException;
+import org.apache.portals.applications.webcontent2.rewriter2.Sink;
+import org.apache.portals.applications.webcontent2.rewriter2.Source;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.Serializer;
+import org.htmlcleaner.TagNode;
+import org.htmlcleaner.TagNodeVisitor;
+
+public class HtmlCleanerContentRewriter implements ContentRewriter
+{
+ private final HtmlCleaner cleaner;
+ private final SerializerFactory serializerFactory;
+ private TagNodeVisitor tagNodeVisitor;
+
+ public HtmlCleanerContentRewriter(final HtmlCleaner cleaner, final SerializerFactory serializerFactory)
+ {
+ this.cleaner = cleaner;
+ this.serializerFactory = serializerFactory;
+ }
+
+ public void setTagNodeVisitor(TagNodeVisitor tagNodeVisitor)
+ {
+ this.tagNodeVisitor = tagNodeVisitor;
+ }
+
+ public void rewrite(Source source, Sink sink, ContentRewritingContext context) throws ContentRewritingException, IOException
+ {
+ Serializer serializer = null;
+
+ try {
+ serializer = serializerFactory.createSerializer();
+ } catch (Exception e) {
+ throw new ContentRewritingException("Failed to create serializer. " + e, e);
+ }
+
+ Reader reader = null;
+ BufferedReader br = null;
+ Writer writer = null;
+ BufferedWriter bw = null;
+
+ try {
+ reader = source.getReader();
+ br = new BufferedReader(reader);
+ writer = sink.getWriter();
+ bw = new BufferedWriter(writer);
+
+ TagNode tagNode = cleaner.clean(br);
+
+ if (tagNodeVisitor != null) {
+ tagNode.traverse(tagNodeVisitor);
+ }
+
+ serializer.write(tagNode, writer, "UTF-8");
+ } finally {
+ IOUtils.closeQuietly(br);
+ IOUtils.closeQuietly(reader);
+ IOUtils.closeQuietly(bw);
+ IOUtils.closeQuietly(writer);
+ }
+ }
+}
Added: portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/main/java/org/apache/portals/applications/webcontent2/rewriter2/htmlcleaner/SerializerFactory.java Sat Jul 5 23:33:22 2014
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner;
+
+import org.htmlcleaner.Serializer;
+
+public interface SerializerFactory
+{
+ public Serializer createSerializer() throws Exception;
+}
Added: portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/HtmlCleanerContentRewriterTest.java Sat Jul 5 23:33:22 2014
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.portals.applications.webcontent2.rewriter2;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+
+import org.apache.commons.io.output.TeeOutputStream;
+import org.apache.commons.lang.StringUtils;
+import org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner.DefaultSerializerFactory;
+import org.apache.portals.applications.webcontent2.rewriter2.htmlcleaner.HtmlCleanerContentRewriter;
+import org.apache.portals.applications.webcontent2.rewriter2.impl.SimpleContentRewritingContext;
+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.HtmlNode;
+import org.htmlcleaner.SimpleHtmlSerializer;
+import org.htmlcleaner.TagNode;
+import org.htmlcleaner.TagNodeVisitor;
+import org.junit.Before;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HtmlCleanerContentRewriterTest
+{
+
+ private static Logger log = LoggerFactory.getLogger(HtmlCleanerContentRewriterTest.class);
+
+ private Source source;
+ private Sink sink;
+ private ByteArrayOutputStream sinkTeeOut;
+ private HtmlCleanerContentRewriter contentRewriter;
+
+ @Before
+ public void before() {
+ source = new Source() {
+ public InputStream getInputStream() throws IOException
+ {
+ return HtmlCleanerContentRewriterTest.class.getResourceAsStream("guidelines.html");
+ }
+ public Reader getReader() throws IOException
+ {
+ return new InputStreamReader(getInputStream());
+ }
+ };
+
+ sinkTeeOut = new ByteArrayOutputStream();
+ sink = new Sink() {
+ public OutputStream getOutputStream() throws IOException
+ {
+ return new TeeOutputStream(System.out, sinkTeeOut);
+ }
+ public Writer getWriter() throws IOException
+ {
+ return new OutputStreamWriter(getOutputStream());
+ }
+ };
+
+ HtmlCleaner cleaner = new HtmlCleaner();
+ cleaner.getProperties().setOmitXmlDeclaration(true);
+
+ DefaultSerializerFactory serializerFactory = new DefaultSerializerFactory();
+ serializerFactory.setSerializerClass(SimpleHtmlSerializer.class);
+ serializerFactory.setArguments(new Object [] { cleaner.getProperties() });
+
+ contentRewriter = new HtmlCleanerContentRewriter(cleaner, serializerFactory);
+
+ final String siteUrl = "http://www.example.com/";
+ contentRewriter.setTagNodeVisitor(new TagNodeVisitor() {
+ public boolean visit(TagNode tagNode, HtmlNode htmlNode) {
+ if (htmlNode instanceof TagNode) {
+ TagNode tag = (TagNode) htmlNode;
+ String tagName = tag.getName();
+
+ if ("a".equals(tagName) || "link".equals(tagName)) {
+ String href = tag.getAttributeByName("href");
+
+ if (href != null) {
+ tag.addAttribute("href", siteUrl + StringUtils.removeStart(href, "./"));
+ }
+ }
+
+ if ("a".equals(tagName) && "Project_Guidelines".equals(tag.getAttributeByName("name"))) {
+ tag.removeFromTree();
+ }
+ }
+
+ // tells visitor to continue traversing the DOM tree
+ return true;
+ }
+ });
+ }
+
+ @Test
+ public void testRewriter() throws Exception {
+ ContentRewritingContext rewritingContext = new SimpleContentRewritingContext();
+ contentRewriter.rewrite(source, sink, rewritingContext);
+ String output = sinkTeeOut.toString();
+ log.debug("OUTPUT: {}", output);
+
+ assertTrue(output.contains("<link rel=\"stylesheet\" href=\"http://www.example.com/css/print.css\" type=\"text/css\" media=\"print\" />"));
+ assertFalse(output.contains("name=\"Project_Guidelines\""));
+ assertTrue(output.contains("<a href=\"http://www.example.com/roles.html\">Roles and Responsibilities</a>"));
+ assertTrue(output.contains("general@portals <a href=\"http://www.example.com/mail-lists.html#general\">"));
+ }
+
+}
Modified: portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java?rev=1608141&r1=1608140&r2=1608141&view=diff
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java (original)
+++ portals/applications/webcontent/trunk/content-rewriter/src/test/java/org/apache/portals/applications/webcontent2/rewriter2/TextLineContentRewriterTest.java Sat Jul 5 23:33:22 2014
@@ -79,7 +79,7 @@ public class TextLineContentRewriterTest
}
@Test
- public void testA() throws Exception {
+ public void testRewriter() throws Exception {
ContentRewritingContext rewritingContext = new SimpleContentRewritingContext();
contentRewriter.rewrite(source, sink, rewritingContext);
String output = sinkTeeOut.toString();
Added: portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html?rev=1608141&view=auto
==============================================================================
--- portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html (added)
+++ portals/applications/webcontent/trunk/content-rewriter/src/test/resources/org/apache/portals/applications/webcontent2/rewriter2/guidelines.html Sat Jul 5 23:33:22 2014
@@ -0,0 +1,41 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<title>The Apache Portals Site - Project Guidelines</title>
+<link rel="stylesheet" href="./css/print.css" type="text/css"
+ media="print" />
+<meta name="author" content="Apache Portals Project" />
+</head>
+<body>
+
+ <h2>
+ <a name="Project_Guidelines"></a>Project Guidelines
+ </h2>
+ <p>This document defines the guidelines of the Portals Project. It
+ includes definitions of the various categories of membership, who is
+ able to vote, how conflicts are resolved by voting, and the procedures
+ to follow for proposing and making changes to the codebase of the
+ Project.</p>
+ <ul>
+ <li><a href="./roles.html">Roles and Responsibilities</a><br />
+ Defines the recognized roles in the project.</li>
+ <li><a href="./communication.html">Communication</a><br />
+ Defines how users and developers communicate.</li>
+ <li><a href="./decisions.html">Decision Making</a><br /> Defines
+ how action items are proposed and voted on.</li>
+ <li><a href="./management.html">Project Management</a><br />
+ Defines the roles and responsibilities of the Project Management
+ Committee (PMC).</li>
+ <li><a href="./newproject.html">New Subproject Proposals</a><br />
+ Defines the methodology for proposing new top level Jakarta
+ Subprojects.</li>
+ </ul>
+ <p>
+ This is a living document. Changes can be made by the Project
+ Management Committee. Suggestions for changes should be discussed on
+ the general@portals <a href="./mail-lists.html#general">mailing
+ list</a>
+ </p>
+</body>
+</html>
\ No newline at end of file
Modified: portals/applications/webcontent/trunk/pom.xml
URL: http://svn.apache.org/viewvc/portals/applications/webcontent/trunk/pom.xml?rev=1608141&r1=1608140&r2=1608141&view=diff
==============================================================================
--- portals/applications/webcontent/trunk/pom.xml (original)
+++ portals/applications/webcontent/trunk/pom.xml Sat Jul 5 23:33:22 2014
@@ -42,6 +42,7 @@
<junit.version>4.11</junit.version>
<easymock.version>3.2</easymock.version>
<nekohtml.version>0.9.5</nekohtml.version>
+ <htmlcleaner.version>2.8</htmlcleaner.version>
<castor.version>1.1.1</castor.version>
<httpcomponents-httpcore.version>4.0.1</httpcomponents-httpcore.version>
<httpcomponents-httpclient.version>4.0</httpcomponents-httpclient.version>
@@ -159,6 +160,12 @@
</dependency>
<dependency>
+ <groupId>net.sourceforge.htmlcleaner</groupId>
+ <artifactId>htmlcleaner</artifactId>
+ <version>${htmlcleaner.version}</version>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>${httpcomponents-httpcore.version}</version>