You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by ro...@apache.org on 2017/10/18 23:23:21 UTC

[sling-org-apache-sling-commons-html] 35/36: SLING-6783 updates for org.apache.commons.html

This is an automated email from the ASF dual-hosted git repository.

rombert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git

commit fae7ec857766e4a2adb0acdaaa1fad3624c07271
Author: Robert Munteanu <ro...@apache.org>
AuthorDate: Mon Apr 24 07:34:50 2017 +0000

    SLING-6783 updates for org.apache.commons.html
    
    Submitted-By: Jason Bailey
    
    git-svn-id: https://svn.apache.org/repos/asf/sling/trunk@1792430 13f79535-47bb-0310-9956-ffa450edef68
---
 NOTICE                                             |  2 +-
 README.md                                          | 39 ++++++++++++++++++++++
 pom.xml                                            | 14 ++++----
 .../sling/commons/html/impl/HtmlParserImpl.java    | 36 ++++++++++++++++++--
 4 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/NOTICE b/NOTICE
index be0c7d1..92f43bf 100644
--- a/NOTICE
+++ b/NOTICE
@@ -8,4 +8,4 @@ This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 
 This product includes software developed at
-http://home.ccil.org/~cowan/XML/tagsoup/
\ No newline at end of file
+http://vrici.lojban.org/~cowan/XML/tagsoup/
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..106367a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,39 @@
+# current settings and their default values
+
+* http://xml.org/sax/features/namespaces=true
+* http://xml.org/sax/features/namespace-prefixes=false
+* http://xml.org/sax/features/external-general-entities=false
+* http://xml.org/sax/features/external-parameter-entities=false
+* http://xml.org/sax/features/is-standalone=false
+* http://xml.org/sax/features/lexical-handler/parameter-entities=false
+* http://xml.org/sax/features/resolve-dtd-uris=true
+* http://xml.org/sax/features/string-interning=true
+* http://xml.org/sax/features/use-attributes2=false
+* http://xml.org/sax/features/use-locator2=false
+* http://xml.org/sax/features/use-entity-resolver2=false
+* http://xml.org/sax/features/validation=false
+* http://xml.org/sax/features/xmlns-uris=false
+* http://xml.org/sax/features/xmlns-uris=false
+* http://xml.org/sax/features/xml-1.1=false
+
+default SAX features are defined here
+http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html
+
+tagsoup specific features are
+
+*  http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons=false
+   A value of "true" indicates that the parser will ignore unknown elements.
+*  http://www.ccil.org/~cowan/tagsoup/features/bogons-empty=false
+   A value of "true" indicates that the parser will give unknown elements a content model of EMPTY; a value of "false", a content model of ANY.
+*  http://www.ccil.org/~cowan/tagsoup/features/root-bogons=true
+   A value of "true" indicates that the parser will allow unknown elements to be the root of the output document.
+*  http://www.ccil.org/~cowan/tagsoup/features/default-attributes=true
+   A value of "true" indicates that the parser will return default attribute values for missing attributes that have default values.
+* http://www.ccil.org/~cowan/tagsoup/features/translate-colons=false
+  A value of "true" indicates that the parser will translate colons into underscores in names.
+* http://www.ccil.org/~cowan/tagsoup/features/restart-elements=true
+  A value of "true" indicates that the parser will attempt to restart the restartable elements.
+* http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace=false
+  A value of "true" indicates that the parser will transmit whitespace in element-only content via the SAX ignorableWhitespace callback. Normally this is not done, because HTML is an SGML application and SGML suppresses such whitespace.
+* http://www.ccil.org/~cowan/tagsoup/features/cdata-elements=true
+  A value of "true" indicates that the parser will process the script and style elements (or any elements with type='cdata' in the TSSL schema) as SGML CDATA elements (that is, no markup is recognized except the matching end-tag).
diff --git a/pom.xml b/pom.xml
index 97eddd9..f022b61 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
     <parent>
         <groupId>org.apache.sling</groupId>
         <artifactId>sling</artifactId>
-        <version>26</version>
+        <version>30</version>
         <relativePath/>
     </parent>
 
@@ -46,10 +46,6 @@
         <plugins>
             <plugin>
                 <groupId>org.apache.felix</groupId>
-                <artifactId>maven-scr-plugin</artifactId>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.felix</groupId>
                 <artifactId>maven-bundle-plugin</artifactId>
                 <extensions>true</extensions>
                 <configuration>
@@ -85,11 +81,13 @@
         <dependency>
             <groupId>org.ccil.cowan.tagsoup</groupId>
             <artifactId>tagsoup</artifactId>
-            <version>1.2</version>
+            <version>1.2.1</version>
         </dependency>
         <dependency>
-            <groupId>org.apache.felix</groupId>
-            <artifactId>org.apache.felix.scr.annotations</artifactId>
+        	<groupId>org.apache.sling</groupId>
+        	<artifactId>org.apache.sling.commons.osgi</artifactId>
+        	<version>2.2.0</version>
+        	<scope>provided</scope>
         </dependency>
     </dependencies>
 </project>
diff --git a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java b/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
index d3cd9b8..109e411 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
@@ -20,11 +20,16 @@ package org.apache.sling.commons.html.impl;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Map;
 
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
 import org.apache.sling.commons.html.HtmlParser;
+import org.apache.sling.commons.osgi.PropertiesUtil;
 import org.ccil.cowan.tagsoup.Parser;
+import org.osgi.service.component.annotations.Activate;
+import org.osgi.service.component.annotations.Component;
+import org.osgi.service.metatype.annotations.AttributeDefinition;
+import org.osgi.service.metatype.annotations.Designate;
+import org.osgi.service.metatype.annotations.ObjectClassDefinition;
 import org.w3c.dom.Document;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.InputSource;
@@ -32,8 +37,19 @@ import org.xml.sax.SAXException;
 import org.xml.sax.ext.LexicalHandler;
 
 @Component
-@Service(value=HtmlParser.class)
+@Designate(ocd = HtmlParserImpl.Config.class)
 public class HtmlParserImpl implements HtmlParser {
+	
+    @ObjectClassDefinition(name="Apache Sling HTML Parser", description="Parser configuration")
+    static @interface Config {
+ 
+        @AttributeDefinition(name = "Parser Properties",
+                description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]")
+        String[] properties();
+    
+    }
+    
+    private Map<String,Boolean> features;
 
     /**
      * @see org.apache.sling.commons.html.HtmlParser#parse(java.io.InputStream, java.lang.String, org.xml.sax.ContentHandler)
@@ -44,6 +60,9 @@ public class HtmlParserImpl implements HtmlParser {
         if ( ch instanceof LexicalHandler ) {
             parser.setProperty("http://xml.org/sax/properties/lexical-handler", ch);
         }
+        for (String feature : features.keySet()){
+            parser.setProperty(feature, features.get(feature));
+        }
         parser.setContentHandler(ch);
         final InputSource source = new InputSource(stream);
         source.setEncoding(encoding);
@@ -68,6 +87,9 @@ public class HtmlParserImpl implements HtmlParser {
 
         try {
             parser.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
+            for (String feature : features.keySet()) {
+                parser.setProperty(feature, features.get(feature));
+            }
             parser.setContentHandler(builder);
             parser.parse(source);
         } catch (SAXException se) {
@@ -78,4 +100,12 @@ public class HtmlParserImpl implements HtmlParser {
         }
         return builder.getDocument();
     }
+    
+    @Activate
+    private void activate(Config config) {
+    	Map<String,String> temp = PropertiesUtil.toMap(config.properties(), new String[]{});
+    	for (String key : temp.keySet()){
+    		features.put(key, Boolean.valueOf(temp.get(key)));
+    	}
+    }
 }

-- 
To stop receiving notification emails like this one, please contact
"commits@sling.apache.org" <co...@sling.apache.org>.