You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sling.apache.org by ro...@apache.org on 2017/10/18 23:23:21 UTC
[sling-org-apache-sling-commons-html] 35/36: SLING-6783 updates for
org.apache.commons.html
This is an automated email from the ASF dual-hosted git repository.
rombert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sling-org-apache-sling-commons-html.git
commit fae7ec857766e4a2adb0acdaaa1fad3624c07271
Author: Robert Munteanu <ro...@apache.org>
AuthorDate: Mon Apr 24 07:34:50 2017 +0000
SLING-6783 updates for org.apache.commons.html
Submitted-By: Jason Bailey
git-svn-id: https://svn.apache.org/repos/asf/sling/trunk@1792430 13f79535-47bb-0310-9956-ffa450edef68
---
NOTICE | 2 +-
README.md | 39 ++++++++++++++++++++++
pom.xml | 14 ++++----
.../sling/commons/html/impl/HtmlParserImpl.java | 36 ++++++++++++++++++--
4 files changed, 79 insertions(+), 12 deletions(-)
diff --git a/NOTICE b/NOTICE
index be0c7d1..92f43bf 100644
--- a/NOTICE
+++ b/NOTICE
@@ -8,4 +8,4 @@ This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
This product includes software developed at
-http://home.ccil.org/~cowan/XML/tagsoup/
\ No newline at end of file
+http://vrici.lojban.org/~cowan/XML/tagsoup/
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..106367a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,39 @@
+# current settings and their default values
+
+* http://xml.org/sax/features/namespaces=true
+* http://xml.org/sax/features/namespace-prefixes=false
+* http://xml.org/sax/features/external-general-entities=false
+* http://xml.org/sax/features/external-parameter-entities=false
+* http://xml.org/sax/features/is-standalone=false
+* http://xml.org/sax/features/lexical-handler/parameter-entities=false
+* http://xml.org/sax/features/resolve-dtd-uris=true
+* http://xml.org/sax/features/string-interning=true
+* http://xml.org/sax/features/use-attributes2=false
+* http://xml.org/sax/features/use-locator2=false
+* http://xml.org/sax/features/use-entity-resolver2=false
+* http://xml.org/sax/features/validation=false
+* http://xml.org/sax/features/xmlns-uris=false
+* http://xml.org/sax/features/xmlns-uris=false
+* http://xml.org/sax/features/xml-1.1=false
+
+default SAX features are defined here
+http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html
+
+tagsoup specific features are
+
+* http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons=false
+ A value of "true" indicates that the parser will ignore unknown elements.
+* http://www.ccil.org/~cowan/tagsoup/features/bogons-empty=false
+ A value of "true" indicates that the parser will give unknown elements a content model of EMPTY; a value of "false", a content model of ANY.
+* http://www.ccil.org/~cowan/tagsoup/features/root-bogons=true
+ A value of "true" indicates that the parser will allow unknown elements to be the root of the output document.
+* http://www.ccil.org/~cowan/tagsoup/features/default-attributes=true
+ A value of "true" indicates that the parser will return default attribute values for missing attributes that have default values.
+* http://www.ccil.org/~cowan/tagsoup/features/translate-colons=false
+ A value of "true" indicates that the parser will translate colons into underscores in names.
+* http://www.ccil.org/~cowan/tagsoup/features/restart-elements=true
+ A value of "true" indicates that the parser will attempt to restart the restartable elements.
+* http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace=false
+ A value of "true" indicates that the parser will transmit whitespace in element-only content via the SAX ignorableWhitespace callback. Normally this is not done, because HTML is an SGML application and SGML suppresses such whitespace.
+* http://www.ccil.org/~cowan/tagsoup/features/cdata-elements=true
+ A value of "true" indicates that the parser will process the script and style elements (or any elements with type='cdata' in the TSSL schema) as SGML CDATA elements (that is, no markup is recognized except the matching end-tag).
diff --git a/pom.xml b/pom.xml
index 97eddd9..f022b61 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
<parent>
<groupId>org.apache.sling</groupId>
<artifactId>sling</artifactId>
- <version>26</version>
+ <version>30</version>
<relativePath/>
</parent>
@@ -46,10 +46,6 @@
<plugins>
<plugin>
<groupId>org.apache.felix</groupId>
- <artifactId>maven-scr-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.felix</groupId>
<artifactId>maven-bundle-plugin</artifactId>
<extensions>true</extensions>
<configuration>
@@ -85,11 +81,13 @@
<dependency>
<groupId>org.ccil.cowan.tagsoup</groupId>
<artifactId>tagsoup</artifactId>
- <version>1.2</version>
+ <version>1.2.1</version>
</dependency>
<dependency>
- <groupId>org.apache.felix</groupId>
- <artifactId>org.apache.felix.scr.annotations</artifactId>
+ <groupId>org.apache.sling</groupId>
+ <artifactId>org.apache.sling.commons.osgi</artifactId>
+ <version>2.2.0</version>
+ <scope>provided</scope>
</dependency>
</dependencies>
</project>
diff --git a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java b/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
index d3cd9b8..109e411 100644
--- a/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
+++ b/src/main/java/org/apache/sling/commons/html/impl/HtmlParserImpl.java
@@ -20,11 +20,16 @@ package org.apache.sling.commons.html.impl;
import java.io.IOException;
import java.io.InputStream;
+import java.util.Map;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.sling.commons.html.HtmlParser;
+import org.apache.sling.commons.osgi.PropertiesUtil;
import org.ccil.cowan.tagsoup.Parser;
+import org.osgi.service.component.annotations.Activate;
+import org.osgi.service.component.annotations.Component;
+import org.osgi.service.metatype.annotations.AttributeDefinition;
+import org.osgi.service.metatype.annotations.Designate;
+import org.osgi.service.metatype.annotations.ObjectClassDefinition;
import org.w3c.dom.Document;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
@@ -32,8 +37,19 @@ import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
@Component
-@Service(value=HtmlParser.class)
+@Designate(ocd = HtmlParserImpl.Config.class)
public class HtmlParserImpl implements HtmlParser {
+
+ @ObjectClassDefinition(name="Apache Sling HTML Parser", description="Parser configuration")
+ static @interface Config {
+
+ @AttributeDefinition(name = "Parser Properties",
+ description = "Additional properties to be applied to the underlying parser in the format of key=[true|false]")
+ String[] properties();
+
+ }
+
+ private Map<String,Boolean> features;
/**
* @see org.apache.sling.commons.html.HtmlParser#parse(java.io.InputStream, java.lang.String, org.xml.sax.ContentHandler)
@@ -44,6 +60,9 @@ public class HtmlParserImpl implements HtmlParser {
if ( ch instanceof LexicalHandler ) {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", ch);
}
+ for (String feature : features.keySet()){
+ parser.setProperty(feature, features.get(feature));
+ }
parser.setContentHandler(ch);
final InputSource source = new InputSource(stream);
source.setEncoding(encoding);
@@ -68,6 +87,9 @@ public class HtmlParserImpl implements HtmlParser {
try {
parser.setProperty("http://xml.org/sax/properties/lexical-handler", builder);
+ for (String feature : features.keySet()) {
+ parser.setProperty(feature, features.get(feature));
+ }
parser.setContentHandler(builder);
parser.parse(source);
} catch (SAXException se) {
@@ -78,4 +100,12 @@ public class HtmlParserImpl implements HtmlParser {
}
return builder.getDocument();
}
+
+ @Activate
+ private void activate(Config config) {
+ Map<String,String> temp = PropertiesUtil.toMap(config.properties(), new String[]{});
+ for (String key : temp.keySet()){
+ features.put(key, Boolean.valueOf(temp.get(key)));
+ }
+ }
}
--
To stop receiving notification emails like this one, please contact
"commits@sling.apache.org" <co...@sling.apache.org>.