You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by ol...@apache.org on 2018/04/01 20:11:21 UTC

[sling-org-apache-sling-commons-html] branch master updated: SLING-6783 Updates for Commons HTML

This is an automated email from the ASF dual-hosted git repository.

olli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git


The following commit(s) were added to refs/heads/master by this push:
     new a765dda  SLING-6783 Updates for Commons HTML
a765dda is described below

commit a765ddaa13027e3a8d3ba92c4254e88254b469b4
Author: Oliver Lietz <ol...@apache.org>
AuthorDate: Sun Apr 1 22:10:56 2018 +0200

    SLING-6783 Updates for Commons HTML
    
    * fix NPE and enable tests
    * fix javadoc
    * prepare for different parsers by renaming HtmlParserImpl and adding component properties
    * improve configuration
    * style
---
 pom.xml                                            |  15 +---
 .../org/apache/sling/commons/html/HtmlParser.java  |  33 ++++---
 .../html/{impl => internal}/DOMBuilder.java        |  99 ++++++++++----------
 .../TagsoupHtmlParser.java}                        | 100 ++++++++++++---------
 .../internal/TagsoupHtmlParserConfiguration.java   |  36 ++++++++
 ...gsoupParserIT.java => TagsoupHtmlParserIT.java} |  17 ++--
 6 files changed, 174 insertions(+), 126 deletions(-)

diff --git a/pom.xml b/pom.xml
index 8b34cf5..435e43f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -36,6 +36,7 @@
     </description>
 
     <properties>
+        <sling.java.version>8</sling.java.version>
         <org.ops4j.pax.exam.version>4.11.0</org.ops4j.pax.exam.version>
     </properties>
 
@@ -83,20 +84,6 @@
         </plugins>
     </build>
 
-    <reporting>
-        <plugins>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-javadoc-plugin</artifactId>
-                <configuration>
-                    <excludePackageNames>
-                        org.apache.sling.commons.html.impl
-                    </excludePackageNames>
-                </configuration>
-            </plugin>
-        </plugins>
-    </reporting>
-
     <dependencies>
         <!-- javax -->
         <dependency>
diff --git a/src/main/java/org/apache/sling/commons/html/HtmlParser.java b/src/main/java/org/apache/sling/commons/html/HtmlParser.java
index 1d0887d..725989f 100644
--- a/src/main/java/org/apache/sling/commons/html/HtmlParser.java
+++ b/src/main/java/org/apache/sling/commons/html/HtmlParser.java
@@ -26,26 +26,31 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 /**
- * The html parser is a service to parse html and generate
- * SAX events or a Document out of the html.
- *
+ * The HTML parser is a service to parse HTML and generate
+ * SAX events or a Document out of the HTML.
  */
 public interface HtmlParser {
 
     /**
-     * Parse html and send sax events.
-     * @param stream The input stream
-     * @param encoding Encoding of the input stream, <code>null</code>for default encoding.
-     * @param ch Content handler receiving the SAX events. The content handler might also
-     *           implement the lexical handler interface.
+     * Parse HTML and send SAX events.
+     *
+     * @param inputStream    The input stream
+     * @param encoding       Encoding of the input stream, <code>null</code> for default encoding.
+     * @param contentHandler Content handler receiving the SAX events. The content handler might also
+     *                       implement the lexical handler interface.
+     * @throws SAXException Exception thrown when parsing fails.
      */
-    void parse(InputStream stream, String encoding, ContentHandler ch) throws SAXException;
+    void parse(InputStream inputStream, String encoding, ContentHandler contentHandler) throws SAXException;
 
     /**
-     * Parse html and return a DOM Document.
-     * @param The system id
-     * @param stream The input stream
-     * @param encoding Encoding of the input stream, <code>null</code>for default encoding.
+     * Parse HTML and return a DOM Document.
+     *
+     * @param systemId    The system id
+     * @param inputStream The input stream
+     * @param encoding    Encoding of the input stream, <code>null</code> for default encoding.
+     * @return A DOM Document built from parsed HTML or <code>null</code>
+     * @throws IOException Exception thrown when parsing fails.
      */
-    Document parse(String systemId, InputStream stream, String encoding) throws IOException;
+    Document parse(String systemId, InputStream inputStream, String encoding) throws IOException;
+
 }
diff --git a/src/main/java/org/apache/sling/commons/html/impl/DOMBuilder.java b/src/main/java/org/apache/sling/commons/html/internal/DOMBuilder.java
similarity index 60%
rename from src/main/java/org/apache/sling/commons/html/impl/DOMBuilder.java
rename to src/main/java/org/apache/sling/commons/html/internal/DOMBuilder.java
index 375ae9d..b4b6868 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/DOMBuilder.java
+++ b/src/main/java/org/apache/sling/commons/html/internal/DOMBuilder.java
@@ -14,10 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sling.commons.html.impl;
+package org.apache.sling.commons.html.internal;
 
 import java.io.IOException;
 
+import javax.xml.transform.TransformerException;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMResult;
 import javax.xml.transform.sax.SAXTransformerFactory;
@@ -34,14 +35,14 @@ import org.xml.sax.ext.LexicalHandler;
 /**
  * The <code>DOMBuilder</code> is a utility class that will generate a W3C
  * DOM Document from SAX events.
- *
  */
 public class DOMBuilder implements ContentHandler, LexicalHandler {
 
-    /** The default transformer factory shared by all instances */
+    /**
+     * The default transformer factory shared by all instances
+     */
     private static final SAXTransformerFactory FACTORY = (SAXTransformerFactory) TransformerFactory.newInstance();
 
-    /** The result */
     private final DOMResult result;
 
     private final ContentHandler contentHandler;
@@ -49,21 +50,23 @@ public class DOMBuilder implements ContentHandler, LexicalHandler {
 
     /**
      * Construct a new instance of this DOMBuilder.
+     *
+     * @throws IOException If for some reason the <code>TransformerHandler</code> cannot be created.
      */
-    public DOMBuilder() throws IOException {
+    DOMBuilder() throws IOException {
         try {
             final TransformerHandler handler = FACTORY.newTransformerHandler();
             this.contentHandler = handler;
             this.lexicalHandler = handler;
             this.result = new DOMResult();
             handler.setResult(this.result);
-        } catch (javax.xml.transform.TransformerException local) {
-            throw (IOException) new IOException("Fatal-Error: Unable to get transformer handler").initCause(local);
+        } catch (TransformerException te) {
+            throw new IOException("Unable to get transformer handler", te);
         }
     }
 
     /**
-     * Return the newly built Document.
+     * @return The newly built <code>Document</code> or <code>null</code>.
      */
     public Document getDocument() {
         if (this.result.getNode() == null) {
@@ -75,92 +78,94 @@ public class DOMBuilder implements ContentHandler, LexicalHandler {
         }
     }
 
+    @Override
     public void setDocumentLocator(Locator locator) {
         contentHandler.setDocumentLocator(locator);
     }
 
-    public void startDocument()
-    throws SAXException {
+    @Override
+    public void startDocument() throws SAXException {
         contentHandler.startDocument();
     }
 
-    public void endDocument()
-    throws SAXException {
+    @Override
+    public void endDocument() throws SAXException {
         contentHandler.endDocument();
     }
 
-    public void startPrefixMapping(String prefix, String uri)
-    throws SAXException {
+    @Override
+    public void startPrefixMapping(String prefix, String uri) throws SAXException {
         contentHandler.startPrefixMapping(prefix, uri);
     }
 
-    public void endPrefixMapping(String prefix)
-    throws SAXException {
+    @Override
+    public void endPrefixMapping(String prefix) throws SAXException {
         contentHandler.endPrefixMapping(prefix);
     }
 
-    public void startElement(String uri, String loc, String raw, Attributes a)
-    throws SAXException {
-        contentHandler.startElement(uri, loc, raw, a);
+    @Override
+    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
+        contentHandler.startElement(uri, localName, qName, atts);
     }
 
-    public void endElement(String uri, String loc, String raw)
-    throws SAXException {
-        contentHandler.endElement(uri, loc, raw);
+    @Override
+    public void endElement(String uri, String localName, String qName) throws SAXException {
+        contentHandler.endElement(uri, localName, qName);
     }
 
-    public void characters(char c[], int start, int len)
-    throws SAXException {
-        contentHandler.characters(c, start, len);
+    @Override
+    public void characters(char[] ch, int start, int length) throws SAXException {
+        contentHandler.characters(ch, start, length);
     }
 
-    public void ignorableWhitespace(char c[], int start, int len)
-    throws SAXException {
-        contentHandler.ignorableWhitespace(c, start, len);
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
+        contentHandler.ignorableWhitespace(ch, start, length);
     }
 
-    public void processingInstruction(String target, String data)
-    throws SAXException {
+    @Override
+    public void processingInstruction(String target, String data) throws SAXException {
         contentHandler.processingInstruction(target, data);
     }
 
-    public void skippedEntity(String name)
-    throws SAXException {
+    @Override
+    public void skippedEntity(String name) throws SAXException {
         contentHandler.skippedEntity(name);
     }
 
-    public void startDTD(String name, String publicId, String systemId)
-    throws SAXException {
+    @Override
+    public void startDTD(String name, String publicId, String systemId) throws SAXException {
         lexicalHandler.startDTD(name, publicId, systemId);
     }
 
-    public void endDTD()
-    throws SAXException {
+    @Override
+    public void endDTD() throws SAXException {
         lexicalHandler.endDTD();
     }
 
-    public void startEntity(String name)
-    throws SAXException {
+    @Override
+    public void startEntity(String name) throws SAXException {
         lexicalHandler.startEntity(name);
     }
 
-    public void endEntity(String name)
-    throws SAXException {
+    @Override
+    public void endEntity(String name) throws SAXException {
         lexicalHandler.endEntity(name);
     }
 
-    public void startCDATA()
-    throws SAXException {
+    @Override
+    public void startCDATA() throws SAXException {
         lexicalHandler.startCDATA();
     }
 
-    public void endCDATA()
-    throws SAXException {
+    @Override
+    public void endCDATA() throws SAXException {
         lexicalHandler.endCDATA();
     }
 
-    public void comment(char ch[], int start, int len)
-    throws SAXException {
-        lexicalHandler.comment(ch, start, len);
+    @Override
+    public void comment(char[] ch, int start, int length) throws SAXException {
+        lexicalHandler.comment(ch, start, length);
     }
+
 }
diff --git a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParser.java
similarity index 55%
rename from src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
rename to src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParser.java
index 109e411..082f5b0 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
+++ b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParser.java
@@ -16,10 +16,12 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-package org.apache.sling.commons.html.impl;
+package org.apache.sling.commons.html.internal;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Collections;
+import java.util.LinkedHashMap;
 import java.util.Map;
 
 import org.apache.sling.commons.html.HtmlParser;
@@ -27,43 +29,57 @@ import org.apache.sling.commons.osgi.PropertiesUtil;
 import org.ccil.cowan.tagsoup.Parser;
 import org.osgi.service.component.annotations.Activate;
 import org.osgi.service.component.annotations.Component;
-import org.osgi.service.metatype.annotations.AttributeDefinition;
+import org.osgi.service.component.annotations.Deactivate;
+import org.osgi.service.component.annotations.Modified;
 import org.osgi.service.metatype.annotations.Designate;
-import org.osgi.service.metatype.annotations.ObjectClassDefinition;
 import org.w3c.dom.Document;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.InputSource;
 import org.xml.sax.SAXException;
 import org.xml.sax.ext.LexicalHandler;
 
-@Component
-@Designate(ocd = HtmlParserImpl.Config.class)
-public class HtmlParserImpl implements HtmlParser {
-	
-    @ObjectClassDefinition(name="Apache Sling HTML Parser", description="Parser configuration")
-    static @interface Config {
- 
-        @AttributeDefinition(name = "Parser Properties",
-                description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]")
-        String[] properties();
-    
+@Component(
+    property = {
+        "dom=tagsoup",
+        "sax=tagsoup"
+    }
+)
+@Designate(
+    ocd = TagsoupHtmlParserConfiguration.class
+)
+public class TagsoupHtmlParser implements HtmlParser {
+
+    private Map<String, Boolean> properties = Collections.synchronizedMap(new LinkedHashMap<>());
+
+    @Activate
+    private void activate(final TagsoupHtmlParserConfiguration configuration) {
+        configure(configuration);
+    }
+
+    @Modified
+    private void modified(final TagsoupHtmlParserConfiguration configuration) {
+        configure(configuration);
+    }
+
+    @Deactivate
+    private void deactivate() {
+        properties.clear();
+    }
+
+    private void configure(final TagsoupHtmlParserConfiguration configuration) {
+        properties.clear();
+        final Map<String, String> map = PropertiesUtil.toMap(configuration.parser_properties(), new String[]{});
+        for (final String key : map.keySet()) {
+            properties.put(key, Boolean.valueOf(map.get(key)));
+        }
     }
-    
-    private Map<String,Boolean> features;
 
     /**
      * @see org.apache.sling.commons.html.HtmlParser#parse(java.io.InputStream, java.lang.String, org.xml.sax.ContentHandler)
      */
-    public void parse(InputStream stream, String encoding, ContentHandler ch)
-    throws SAXException {
-        final Parser parser = new Parser();
-        if ( ch instanceof LexicalHandler ) {
-            parser.setProperty("http://xml.org/sax/properties/lexical-handler", ch);
-        }
-        for (String feature : features.keySet()){
-            parser.setProperty(feature, features.get(feature));
-        }
-        parser.setContentHandler(ch);
+    @Override
+    public void parse(final InputStream stream, final String encoding, final ContentHandler contentHandler) throws SAXException {
+        final Parser parser = buildParser(properties, contentHandler);
         final InputSource source = new InputSource(stream);
         source.setEncoding(encoding);
         try {
@@ -76,9 +92,8 @@ public class HtmlParserImpl implements HtmlParser {
     /**
      * @see org.apache.sling.commons.html.HtmlParser#parse(java.lang.String, java.io.InputStream, java.lang.String)
      */
+    @Override
     public Document parse(String systemId, InputStream stream, String encoding) throws IOException {
-        final Parser parser = new Parser();
-
         final DOMBuilder builder = new DOMBuilder();
 
         final InputSource source = new InputSource(stream);
@@ -86,26 +101,27 @@ public class HtmlParserImpl implements HtmlParser {
         source.setSystemId(systemId);
 
         try {
-            parser.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
-            for (String feature : features.keySet()) {
-                parser.setProperty(feature, features.get(feature));
-            }
-            parser.setContentHandler(builder);
+            final Parser parser = buildParser(properties, builder);
             parser.parse(source);
         } catch (SAXException se) {
-            if ( se.getCause() instanceof IOException ) {
+            if (se.getCause() instanceof IOException) {
                 throw (IOException) se.getCause();
             }
-            throw (IOException) new IOException("Unable to parse xml.").initCause(se);
+            throw new IOException("Unable to parse HTML", se);
         }
         return builder.getDocument();
     }
-    
-    @Activate
-    private void activate(Config config) {
-    	Map<String,String> temp = PropertiesUtil.toMap(config.properties(), new String[]{});
-    	for (String key : temp.keySet()){
-    		features.put(key, Boolean.valueOf(temp.get(key)));
-    	}
+
+    private Parser buildParser(final Map<String, Boolean> properties, final ContentHandler contentHandler) throws SAXException {
+        final Parser parser = new Parser();
+        parser.setContentHandler(contentHandler);
+        if (contentHandler instanceof LexicalHandler) {
+            parser.setProperty("http://xml.org/sax/properties/lexical-handler", contentHandler);
+        }
+        for (final String key : properties.keySet()) {
+            parser.setProperty(key, properties.get(key));
+        }
+        return parser;
     }
+
 }
diff --git a/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParserConfiguration.java b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParserConfiguration.java
new file mode 100644
index 0000000..95fd232
--- /dev/null
+++ b/src/main/java/org/apache/sling/commons/html/internal/TagsoupHtmlParserConfiguration.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sling.commons.html.internal;
+
+import org.osgi.service.metatype.annotations.AttributeDefinition;
+import org.osgi.service.metatype.annotations.ObjectClassDefinition;
+
+@ObjectClassDefinition(
+    name = "Apache Sling HTML Parser (TagSoup)",
+    description = "TagSoup parser configuration"
+)
+@interface TagsoupHtmlParserConfiguration {
+
+    @AttributeDefinition(
+        name = "parser properties",
+        description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]"
+    )
+    String[] parser_properties() default {};
+
+}
diff --git a/src/test/java/org/apache/sling/commons/html/it/TagsoupParserIT.java b/src/test/java/org/apache/sling/commons/html/it/TagsoupHtmlParserIT.java
similarity index 80%
rename from src/test/java/org/apache/sling/commons/html/it/TagsoupParserIT.java
rename to src/test/java/org/apache/sling/commons/html/it/TagsoupHtmlParserIT.java
index 954e4c6..1f4c585 100644
--- a/src/test/java/org/apache/sling/commons/html/it/TagsoupParserIT.java
+++ b/src/test/java/org/apache/sling/commons/html/it/TagsoupHtmlParserIT.java
@@ -24,7 +24,6 @@ import javax.inject.Inject;
 
 import org.apache.commons.lang3.reflect.FieldUtils;
 import org.apache.sling.commons.html.HtmlParser;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.ops4j.pax.exam.Configuration;
@@ -32,6 +31,7 @@ import org.ops4j.pax.exam.Option;
 import org.ops4j.pax.exam.junit.PaxExam;
 import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
 import org.ops4j.pax.exam.spi.reactors.PerClass;
+import org.ops4j.pax.exam.util.Filter;
 
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
@@ -39,33 +39,32 @@ import static org.ops4j.pax.exam.cm.ConfigurationAdminOptions.newConfiguration;
 
 @RunWith(PaxExam.class)
 @ExamReactorStrategy(PerClass.class)
-public class TagsoupParserIT extends HtmlTestSupport {
+public class TagsoupHtmlParserIT extends HtmlTestSupport {
 
     @Inject
+    @Filter(value = "(&(dom=tagsoup)(sax=tagsoup))")
     private HtmlParser htmlParser;
 
     @Configuration
     public Option[] configuration() {
         return new Option[]{
             this.baseConfiguration(),
-            newConfiguration("org.apache.sling.commons.html.impl.HtmlParserImpl")
-                .put("properties", "foo=true")
+            newConfiguration("org.apache.sling.commons.html.internal.TagsoupHtmlParser")
+                .put("parser.properties", "foo=true")
                 .asOption(),
         };
     }
 
     @Test
-    @Ignore
     public void testHtmlParser() {
         assertNotNull(htmlParser);
     }
 
     @Test
-    @Ignore
     public void testConfiguration() throws IllegalAccessException {
-        @SuppressWarnings("unchecked") final Map<String, Boolean> features = (Map<String, Boolean>) FieldUtils.readDeclaredField(htmlParser, "features", true);
-        assertNotNull(features);
-        final Boolean foo = features.get("foo");
+        @SuppressWarnings("unchecked") final Map<String, Boolean> properties = (Map<String, Boolean>) FieldUtils.readDeclaredField(htmlParser, "properties", true);
+        assertNotNull(properties);
+        final Boolean foo = properties.get("foo");
         assertTrue(foo);
     }
 

-- 
To stop receiving notification emails like this one, please contact
olli@apache.org.