You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@any23.apache.org by GitBox <gi...@apache.org> on 2019/09/20 16:18:43 UTC

[GitHub] [any23] lewismc commented on a change in pull request #141: ANY23-443 improve speed & stability of RDFa extractors

lewismc commented on a change in pull request #141: ANY23-443 improve speed & stability of RDFa extractors
URL: https://github.com/apache/any23/pull/141#discussion_r326693171
 
 

 ##########
 File path: core/src/main/java/org/apache/any23/extractor/rdfa/JsoupScanner.java
 ##########
 @@ -0,0 +1,159 @@
+package org.apache.any23.extractor.rdfa;
+
+import org.jsoup.nodes.CDataNode;
+import org.jsoup.nodes.Comment;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+import org.jsoup.select.NodeVisitor;
+import org.semarglproject.sink.XmlSink;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+import org.xml.sax.helpers.NamespaceSupport;
+
+import java.util.ArrayList;
+
+class JsoupScanner implements NodeVisitor {
+
+    private final NamespaceSupport ns = new NamespaceSupport();
+    private final AttributesImpl attrs = new AttributesImpl();
+    private final String[] nameParts = new String[3];
+
+    private final XmlSink handler;
+
+    JsoupScanner(XmlSink handler) {
+        this.handler = handler;
+    }
+
+    private static String orEmpty(String str) {
+        return str == null ? "" : str;
+    }
+
+//    private static String orNull(String str) {
 
 Review comment:
   Just remove???

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services