You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by ol...@apache.org on 2018/04/01 20:11:21 UTC
[sling-org-apache-sling-commons-html] branch master updated:
SLING-6783 Updates for Commons HTML
This is an automated email from the ASF dual-hosted git repository.
olli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git
The following commit(s) were added to refs/heads/master by this push:
new a765dda SLING-6783 Updates for Commons HTML
a765dda is described below
commit a765ddaa13027e3a8d3ba92c4254e88254b469b4
Author: Oliver Lietz <ol...@apache.org>
AuthorDate: Sun Apr 1 22:10:56 2018 +0200
SLING-6783 Updates for Commons HTML
* fix NPE and enable tests
* fix javadoc
* prepare for different parsers by renaming HtmlParserImpl and adding component properties
* improve configuration
* style
---
pom.xml | 15 +---
.../org/apache/sling/commons/html/HtmlParser.java | 33 ++++---
.../html/{impl => internal}/DOMBuilder.java | 99 ++++++++++----------
.../TagsoupHtmlParser.java} | 100 ++++++++++++---------
.../internal/TagsoupHtmlParserConfiguration.java | 36 ++++++++
...gsoupParserIT.java => TagsoupHtmlParserIT.java} | 17 ++--
6 files changed, 174 insertions(+), 126 deletions(-)
diff --git a/pom.xml b/pom.xml
index 8b34cf5..435e43f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -36,6 +36,7 @@
</description>
<properties>
+ <sling.java.version>8</sling.java.version>
<org.ops4j.pax.exam.version>4.11.0</org.ops4j.pax.exam.version>
</properties>
@@ -83,20 +84,6 @@
</plugins>
</build>
- <reporting>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-javadoc-plugin</artifactId>
- <configuration>
- <excludePackageNames>
- org.apache.sling.commons.html.impl
- </excludePackageNames>
- </configuration>
- </plugin>
- </plugins>
- </reporting>
-
<dependencies>
<!-- javax -->
<dependency>
diff --git a/src/main/java/org/apache/sling/commons/html/HtmlParser.java b/src/main/java/org/apache/sling/commons/html/HtmlParser.java
index 1d0887d..725989f 100644
--- a/src/main/java/org/apache/sling/commons/html/HtmlParser.java
+++ b/src/main/java/org/apache/sling/commons/html/HtmlParser.java
@@ -26,26 +26,31 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
- * The html parser is a service to parse html and generate
- * SAX events or a Document out of the html.
- *
+ * The HTML parser is a service to parse HTML and generate
+ * SAX events or a Document out of the HTML.
*/
public interface HtmlParser {
/**
- * Parse html and send sax events.
- * @param stream The input stream
- * @param encoding Encoding of the input stream, <code>null</code>for default encoding.
- * @param ch Content handler receiving the SAX events. The content handler might also
- * implement the lexical handler interface.
+ * Parse HTML and send SAX events.
+ *
+ * @param inputStream The input stream
+ * @param encoding Encoding of the input stream, <code>null</code> for default encoding.
+ * @param contentHandler Content handler receiving the SAX events. The content handler might also
+ * implement the lexical handler interface.
+ * @throws SAXException Exception thrown when parsing fails.
*/
- void parse(InputStream stream, String encoding, ContentHandler ch) throws SAXException;
+ void parse(InputStream inputStream, String encoding, ContentHandler contentHandler) throws SAXException;
/**
- * Parse html and return a DOM Document.
- * @param The system id
- * @param stream The input stream
- * @param encoding Encoding of the input stream, <code>null</code>for default encoding.
+ * Parse HTML and return a DOM Document.
+ *
+ * @param systemId The system id
+ * @param inputStream The input stream
+ * @param encoding Encoding of the input stream, <code>null</code> for default encoding.
+ * @return A DOM Document built from parsed HTML or <code>null</code>
+ * @throws IOException Exception thrown when parsing fails.
*/
- Document parse(String systemId, InputStream stream, String encoding) throws IOException;
+ Document parse(String systemId, InputStream inputStream, String encoding) throws IOException;
+
}
diff --git a/src/main/java/org/apache/sling/commons/html/impl/DOMBuilder.java b/src/main/java/org/apache/sling/commons/html/internal/DOMBuilder.java
similarity index 60%
rename from src/main/java/org/apache/sling/commons/html/impl/DOMBuilder.java
rename to src/main/java/org/apache/sling/commons/html/internal/DOMBuilder.java
index 375ae9d..b4b6868 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/DOMBuilder.java
+++ b/src/main/java/org/apache/sling/commons/html/internal/DOMBuilder.java
@@ -14,10 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.sling.commons.html.impl;
+package org.apache.sling.commons.html.internal;
import java.io.IOException;
+import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.sax.SAXTransformerFactory;
@@ -34,14 +35,14 @@ import org.xml.sax.ext.LexicalHandler;
/**
* The <code>DOMBuilder</code> is a utility class that will generate a W3C
* DOM Document from SAX events.
- *
*/
public class DOMBuilder implements ContentHandler, LexicalHandler {
- /** The default transformer factory shared by all instances */
+ /**
+ * The default transformer factory shared by all instances
+ */
private static final SAXTransformerFactory FACTORY = (SAXTransformerFactory) TransformerFactory.newInstance();
- /** The result */
private final DOMResult result;
private final ContentHandler contentHandler;
@@ -49,21 +50,23 @@ public class DOMBuilder implements ContentHandler, LexicalHandler {
/**
* Construct a new instance of this DOMBuilder.
+ *
+ * @throws IOException If for some reason the <code>TransformerHandler</code> cannot be created.
*/
- public DOMBuilder() throws IOException {
+ DOMBuilder() throws IOException {
try {
final TransformerHandler handler = FACTORY.newTransformerHandler();
this.contentHandler = handler;
this.lexicalHandler = handler;
this.result = new DOMResult();
handler.setResult(this.result);
- } catch (javax.xml.transform.TransformerException local) {
- throw (IOException) new IOException("Fatal-Error: Unable to get transformer handler").initCause(local);
+ } catch (TransformerException te) {
+ throw new IOException("Unable to get transformer handler", te);
}
}
/**
- * Return the newly built Document.
+ * @return The newly built <code>Document</code> or <code>null</code>.
*/
public Document getDocument() {
if (this.result.getNode() == null) {
@@ -75,92 +78,94 @@ public class DOMBuilder implements ContentHandler, LexicalHandler {
}
}
+ @Override
public void setDocumentLocator(Locator locator) {
contentHandler.setDocumentLocator(locator);
}
- public void startDocument()
- throws SAXException {
+ @Override
+ public void startDocument() throws SAXException {
contentHandler.startDocument();
}
- public void endDocument()
- throws SAXException {
+ @Override
+ public void endDocument() throws SAXException {
contentHandler.endDocument();
}
- public void startPrefixMapping(String prefix, String uri)
- throws SAXException {
+ @Override
+ public void startPrefixMapping(String prefix, String uri) throws SAXException {
contentHandler.startPrefixMapping(prefix, uri);
}
- public void endPrefixMapping(String prefix)
- throws SAXException {
+ @Override
+ public void endPrefixMapping(String prefix) throws SAXException {
contentHandler.endPrefixMapping(prefix);
}
- public void startElement(String uri, String loc, String raw, Attributes a)
- throws SAXException {
- contentHandler.startElement(uri, loc, raw, a);
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+ contentHandler.startElement(uri, localName, qName, atts);
}
- public void endElement(String uri, String loc, String raw)
- throws SAXException {
- contentHandler.endElement(uri, loc, raw);
+ @Override
+ public void endElement(String uri, String localName, String qName) throws SAXException {
+ contentHandler.endElement(uri, localName, qName);
}
- public void characters(char c[], int start, int len)
- throws SAXException {
- contentHandler.characters(c, start, len);
+ @Override
+ public void characters(char[] ch, int start, int length) throws SAXException {
+ contentHandler.characters(ch, start, length);
}
- public void ignorableWhitespace(char c[], int start, int len)
- throws SAXException {
- contentHandler.ignorableWhitespace(c, start, len);
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+ contentHandler.ignorableWhitespace(ch, start, length);
}
- public void processingInstruction(String target, String data)
- throws SAXException {
+ @Override
+ public void processingInstruction(String target, String data) throws SAXException {
contentHandler.processingInstruction(target, data);
}
- public void skippedEntity(String name)
- throws SAXException {
+ @Override
+ public void skippedEntity(String name) throws SAXException {
contentHandler.skippedEntity(name);
}
- public void startDTD(String name, String publicId, String systemId)
- throws SAXException {
+ @Override
+ public void startDTD(String name, String publicId, String systemId) throws SAXException {
lexicalHandler.startDTD(name, publicId, systemId);
}
- public void endDTD()
- throws SAXException {
+ @Override
+ public void endDTD() throws SAXException {
lexicalHandler.endDTD();
}
- public void startEntity(String name)
- throws SAXException {
+ @Override
+ public void startEntity(String name) throws SAXException {
lexicalHandler.startEntity(name);
}
- public void endEntity(String name)
- throws SAXException {
+ @Override
+ public void endEntity(String name) throws SAXException {
lexicalHandler.endEntity(name);
}
- public void startCDATA()
- throws SAXException {
+ @Override
+ public void startCDATA() throws SAXException {
lexicalHandler.startCDATA();
}
- public void endCDATA()
- throws SAXException {
+ @Override
+ public void endCDATA() throws SAXException {
lexicalHandler.endCDATA();
}
- public void comment(char ch[], int start, int len)
- throws SAXException {
- lexicalHandler.comment(ch, start, len);
+ @Override
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ lexicalHandler.comment(ch, start, length);
}
+
}
diff --git a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParser.java
similarity index 55%
rename from src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
rename to src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParser.java
index 109e411..082f5b0 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
+++ b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParser.java
@@ -16,10 +16,12 @@
* specific language governing permissions and limitations
* under the License.
*/
-package org.apache.sling.commons.html.impl;
+package org.apache.sling.commons.html.internal;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Collections;
+import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.sling.commons.html.HtmlParser;
@@ -27,43 +29,57 @@ import org.apache.sling.commons.osgi.PropertiesUtil;
import org.ccil.cowan.tagsoup.Parser;
import org.osgi.service.component.annotations.Activate;
import org.osgi.service.component.annotations.Component;
-import org.osgi.service.metatype.annotations.AttributeDefinition;
+import org.osgi.service.component.annotations.Deactivate;
+import org.osgi.service.component.annotations.Modified;
import org.osgi.service.metatype.annotations.Designate;
-import org.osgi.service.metatype.annotations.ObjectClassDefinition;
import org.w3c.dom.Document;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
-@Component
-@Designate(ocd = HtmlParserImpl.Config.class)
-public class HtmlParserImpl implements HtmlParser {
-
- @ObjectClassDefinition(name="Apache Sling HTML Parser", description="Parser configuration")
- static @interface Config {
-
- @AttributeDefinition(name = "Parser Properties",
- description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]")
- String[] properties();
-
+@Component(
+ property = {
+ "dom=tagsoup",
+ "sax=tagsoup"
+ }
+)
+@Designate(
+ ocd = TagsoupHtmlParserConfiguration.class
+)
+public class TagsoupHtmlParser implements HtmlParser {
+
+ private Map<String, Boolean> properties = Collections.synchronizedMap(new LinkedHashMap<>());
+
+ @Activate
+ private void activate(final TagsoupHtmlParserConfiguration configuration) {
+ configure(configuration);
+ }
+
+ @Modified
+ private void modified(final TagsoupHtmlParserConfiguration configuration) {
+ configure(configuration);
+ }
+
+ @Deactivate
+ private void deactivate() {
+ properties.clear();
+ }
+
+ private void configure(final TagsoupHtmlParserConfiguration configuration) {
+ properties.clear();
+ final Map<String, String> map = PropertiesUtil.toMap(configuration.parser_properties(), new String[]{});
+ for (final String key : map.keySet()) {
+ properties.put(key, Boolean.valueOf(map.get(key)));
+ }
}
-
- private Map<String,Boolean> features;
/**
* @see org.apache.sling.commons.html.HtmlParser#parse(java.io.InputStream, java.lang.String, org.xml.sax.ContentHandler)
*/
- public void parse(InputStream stream, String encoding, ContentHandler ch)
- throws SAXException {
- final Parser parser = new Parser();
- if ( ch instanceof LexicalHandler ) {
- parser.setProperty("http://xml.org/sax/properties/lexical-handler", ch);
- }
- for (String feature : features.keySet()){
- parser.setProperty(feature, features.get(feature));
- }
- parser.setContentHandler(ch);
+ @Override
+ public void parse(final InputStream stream, final String encoding, final ContentHandler contentHandler) throws SAXException {
+ final Parser parser = buildParser(properties, contentHandler);
final InputSource source = new InputSource(stream);
source.setEncoding(encoding);
try {
@@ -76,9 +92,8 @@ public class HtmlParserImpl implements HtmlParser {
/**
* @see org.apache.sling.commons.html.HtmlParser#parse(java.lang.String, java.io.InputStream, java.lang.String)
*/
+ @Override
public Document parse(String systemId, InputStream stream, String encoding) throws IOException {
- final Parser parser = new Parser();
-
final DOMBuilder builder = new DOMBuilder();
final InputSource source = new InputSource(stream);
@@ -86,26 +101,27 @@ public class HtmlParserImpl implements HtmlParser {
source.setSystemId(systemId);
try {
- parser.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
- for (String feature : features.keySet()) {
- parser.setProperty(feature, features.get(feature));
- }
- parser.setContentHandler(builder);
+ final Parser parser = buildParser(properties, builder);
parser.parse(source);
} catch (SAXException se) {
- if ( se.getCause() instanceof IOException ) {
+ if (se.getCause() instanceof IOException) {
throw (IOException) se.getCause();
}
- throw (IOException) new IOException("Unable to parse xml.").initCause(se);
+ throw new IOException("Unable to parse HTML", se);
}
return builder.getDocument();
}
-
- @Activate
- private void activate(Config config) {
- Map<String,String> temp = PropertiesUtil.toMap(config.properties(), new String[]{});
- for (String key : temp.keySet()){
- features.put(key, Boolean.valueOf(temp.get(key)));
- }
+
+ private Parser buildParser(final Map<String, Boolean> properties, final ContentHandler contentHandler) throws SAXException {
+ final Parser parser = new Parser();
+ parser.setContentHandler(contentHandler);
+ if (contentHandler instanceof LexicalHandler) {
+ parser.setProperty("http://xml.org/sax/properties/lexical-handler", contentHandler);
+ }
+ for (final String key : properties.keySet()) {
+ parser.setProperty(key, properties.get(key));
+ }
+ return parser;
}
+
}
diff --git a/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParserConfiguration.java b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParserConfiguration.java
new file mode 100644
index 0000000..95fd232
--- /dev/null
+++ b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParserConfiguration.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sling.commons.html.internal;
+
+import org.osgi.service.metatype.annotations.AttributeDefinition;
+import org.osgi.service.metatype.annotations.ObjectClassDefinition;
+
+@ObjectClassDefinition(
+ name = "Apache Sling HTML Parser (TagSoup)",
+ description = "TagSoup parser configuration"
+)
+@interface TagsoupHtmlParserConfiguration {
+
+ @AttributeDefinition(
+ name = "parser properties",
+ description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]"
+ )
+ String[] parser_properties() default {};
+
+}
diff --git a/src/test/java/org/apache/sling/commons/html/it/TagsoupParserIT.java b/src/test/java/org/apache/sling/commons/html/it/TagsoupHtmlParserIT.java
similarity index 80%
rename from src/test/java/org/apache/sling/commons/html/it/TagsoupParserIT.java
rename to src/test/java/org/apache/sling/commons/html/it/TagsoupHtmlParserIT.java
index 954e4c6..1f4c585 100644
--- a/src/test/java/org/apache/sling/commons/html/it/TagsoupParserIT.java
+++ b/src/test/java/org/apache/sling/commons/html/it/TagsoupHtmlParserIT.java
@@ -24,7 +24,6 @@ import javax.inject.Inject;
import org.apache.commons.lang3.reflect.FieldUtils;
import org.apache.sling.commons.html.HtmlParser;
-import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.ops4j.pax.exam.Configuration;
@@ -32,6 +31,7 @@ import org.ops4j.pax.exam.Option;
import org.ops4j.pax.exam.junit.PaxExam;
import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
import org.ops4j.pax.exam.spi.reactors.PerClass;
+import org.ops4j.pax.exam.util.Filter;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
@@ -39,33 +39,32 @@ import static org.ops4j.pax.exam.cm.ConfigurationAdminOptions.newConfiguration;
@RunWith(PaxExam.class)
@ExamReactorStrategy(PerClass.class)
-public class TagsoupParserIT extends HtmlTestSupport {
+public class TagsoupHtmlParserIT extends HtmlTestSupport {
@Inject
+ @Filter(value = "(&(dom=tagsoup)(sax=tagsoup))")
private HtmlParser htmlParser;
@Configuration
public Option[] configuration() {
return new Option[]{
this.baseConfiguration(),
- newConfiguration("org.apache.sling.commons.html.impl.HtmlParserImpl")
- .put("properties", "foo=true")
+ newConfiguration("org.apache.sling.commons.html.internal.TagsoupHtmlParser")
+ .put("parser.properties", "foo=true")
.asOption(),
};
}
@Test
- @Ignore
public void testHtmlParser() {
assertNotNull(htmlParser);
}
@Test
- @Ignore
public void testConfiguration() throws IllegalAccessException {
- @SuppressWarnings("unchecked") final Map<String, Boolean> features = (Map<String, Boolean>) FieldUtils.readDeclaredField(htmlParser, "features", true);
- assertNotNull(features);
- final Boolean foo = features.get("foo");
+ @SuppressWarnings("unchecked") final Map<String, Boolean> properties = (Map<String, Boolean>) FieldUtils.readDeclaredField(htmlParser, "properties", true);
+ assertNotNull(properties);
+ final Boolean foo = properties.get("foo");
assertTrue(foo);
}
--
To stop receiving notification emails like this one, please contact
olli@apache.org.