You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2021/08/20 15:39:24 UTC
[jena] branch main updated: Centralize XML reading setup.
This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 66cae90 Centralize XML reading setup.
new 73ef978 Merge pull request #1052 from afs/xml-open
66cae90 is described below
commit 66cae9052edef6dd08a172191faad3c50bdbe376
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Mon Aug 9 14:44:56 2021 +0100
Centralize XML reading setup.
---
.../java/org/apache/jena/riot/lang/ReaderTriX.java | 30 ++++---
.../apache/jena/riot/resultset/rw/ResultsStAX.java | 17 ++--
.../jena/rdfxml/xmlinput/impl/RDFXMLParser.java | 20 ++---
.../java/org/apache/jena/util/JenaXMLInput.java | 94 ++++++++++++++++++++++
.../apache/jena/rdfxml/xmlinput/DOM2RDFTest.java | 23 +++---
.../jena/rdfxml/xmlinput/MoreDOM2RDFTest.java | 44 +++++-----
6 files changed, 161 insertions(+), 67 deletions(-)
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderTriX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderTriX.java
index e30edad..bb99365 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderTriX.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/ReaderTriX.java
@@ -22,24 +22,36 @@ import static org.apache.jena.riot.lang.ReaderTriX.State.*;
import java.io.InputStream;
import java.io.Reader;
-import java.util.*;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.NoSuchElementException;
+import java.util.Objects;
import javax.xml.namespace.QName;
-import javax.xml.stream.*;
+import javax.xml.stream.Location;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
import org.apache.jena.atlas.web.ContentType;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.XSDDatatype;
-import org.apache.jena.graph.*;
-import org.apache.jena.riot.*;
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.graph.Node_Marker;
+import org.apache.jena.graph.Triple;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.ReaderRIOT;
+import org.apache.jena.riot.ReaderRIOTFactory;
+import org.apache.jena.riot.RiotException;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.ParserProfile;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.riot.writer.StreamWriterTriX;
import org.apache.jena.riot.writer.WriterTriX;
import org.apache.jena.sparql.core.Quad;
-import org.apache.jena.sparql.resultset.ResultSetException;
import org.apache.jena.sparql.util.Context;
+import org.apache.jena.util.JenaXMLInput;
import org.apache.jena.vocabulary.RDF;
/** Read TriX.
@@ -82,21 +94,19 @@ public class ReaderTriX implements ReaderRIOT {
@Override
public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) {
- XMLInputFactory xf = XMLInputFactory.newInstance();
XMLStreamReader xReader;
try {
- xReader = xf.createXMLStreamReader(in);
+ xReader = JenaXMLInput.newXMLStreamReader(in);
} catch (XMLStreamException e) { throw new RiotException("Can't initialize StAX parsing engine", e); }
read(xReader, baseURI, output);
}
@Override
public void read(Reader reader, String baseURI, ContentType ct, StreamRDF output, Context context) {
- XMLInputFactory xf = XMLInputFactory.newInstance();
XMLStreamReader xReader;
try {
- xReader = xf.createXMLStreamReader(reader);
- } catch (XMLStreamException e) { throw new ResultSetException("Can't initialize StAX parsing engine", e); }
+ xReader = JenaXMLInput.newXMLStreamReader(reader);
+ } catch (XMLStreamException e) { throw new RiotException("Can't initialize StAX parsing engine", e); }
read(xReader, baseURI, output);
}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/resultset/rw/ResultsStAX.java b/jena-arq/src/main/java/org/apache/jena/riot/resultset/rw/ResultsStAX.java
index 295c8cc..7af4cf1 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/resultset/rw/ResultsStAX.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/resultset/rw/ResultsStAX.java
@@ -26,7 +26,6 @@ import java.util.NoSuchElementException;
import java.util.Objects;
import javax.xml.namespace.QName;
-import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
@@ -52,31 +51,32 @@ import org.apache.jena.sparql.graph.GraphFactory;
import org.apache.jena.sparql.resultset.ResultSetException;
import org.apache.jena.sparql.resultset.SPARQLResult;
import org.apache.jena.sparql.util.Context;
+import org.apache.jena.util.JenaXMLInput;
/** Public only for use by XMLOutput (legacy) */
public class ResultsStAX implements ResultSet, Closeable {
public static SPARQLResult read(InputStream in, Model model, Context context) {
- XMLInputFactory xf = XMLInputFactory.newInstance() ;
+ XMLStreamReader xReader;
try {
- XMLStreamReader xReader = xf.createXMLStreamReader(in) ;
- return worker(xReader, model, context);
+ xReader = JenaXMLInput.newXMLStreamReader(in);
} catch (XMLStreamException e) {
throw new ResultSetException("Can't initialize StAX parsing engine", e) ;
} catch (Exception ex) {
throw new ResultSetException("Failed when initializing the StAX parsing engine", ex) ;
}
+ return worker(xReader, model, context);
}
public static SPARQLResult read(Reader in, Model model, Context context) {
- XMLInputFactory xf = XMLInputFactory.newInstance() ;
+ XMLStreamReader xReader;
try {
- XMLStreamReader xReader = xf.createXMLStreamReader(in) ;
- return worker(xReader, model, context) ;
+ xReader = JenaXMLInput.newXMLStreamReader(in);
} catch (XMLStreamException e) {
throw new ResultSetException("Can't initialize StAX parsing engine", e) ;
} catch (Exception ex) {
throw new ResultSetException("Failed when initializing the StAX parsing engine", ex) ;
}
+ return worker(xReader, model, context);
}
private static SPARQLResult worker(XMLStreamReader xReader, Model model, Context context) {
@@ -108,9 +108,6 @@ public class ResultsStAX implements ResultSet, Closeable {
private boolean askResult = false;
private ResultsStAX(XMLStreamReader reader, Model model, Context context) {
-
-
-
parser = reader ;
this.model = model ;
boolean inputGraphBNodeLabels = (context != null) && context.isTrue(ARQ.inputGraphBNodeLabels);
diff --git a/jena-core/src/main/java/org/apache/jena/rdfxml/xmlinput/impl/RDFXMLParser.java b/jena-core/src/main/java/org/apache/jena/rdfxml/xmlinput/impl/RDFXMLParser.java
index 77cd22c..9d11fbb 100644
--- a/jena-core/src/main/java/org/apache/jena/rdfxml/xmlinput/impl/RDFXMLParser.java
+++ b/jena-core/src/main/java/org/apache/jena/rdfxml/xmlinput/impl/RDFXMLParser.java
@@ -23,21 +23,18 @@ import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UTFDataFormatException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
import org.apache.jena.rdfxml.xmlinput.FatalParsingErrorException ;
import org.apache.jena.rdfxml.xmlinput.SAX2RDF ;
import org.apache.jena.shared.JenaException;
import org.apache.jena.util.CharEncoding ;
+import org.apache.jena.util.JenaXMLInput;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
/**
- *
- * The main parser, other variants of XMLHandler are for more specialized purposes.
+ * The main RDFXML parser, other variants of XMLHandler are for more specialized purposes.
*/
public class RDFXMLParser extends XMLHandler {
@@ -72,12 +69,9 @@ public class RDFXMLParser extends XMLHandler {
return saxParser;
}
- private static SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
public static RDFXMLParser create() {
- try {
- SAXParser saxParser = saxParserFactory.newSAXParser();
- // Get the encapsulated SAX XMLReader
- XMLReader xmlreader = saxParser.getXMLReader();
+ try {
+ XMLReader xmlreader = JenaXMLInput.createXMLReader();
RDFXMLParser a = new RDFXMLParser(xmlreader);
// Default.
a.setEncoding("UTF");
@@ -99,13 +93,13 @@ public class RDFXMLParser extends XMLHandler {
initEncodingChecks(input);
try {
saxParser.parse(input);
- }
+ }
catch (UTFDataFormatException e) {
generalError(ERR_UTF_ENCODING, e);
- }
+ }
catch (IOException e) {
generalError(ERR_GENERIC_IO, e);
- }
+ }
catch (WrappedException wrapped) {
wrapped.throwMe();
}
diff --git a/jena-core/src/main/java/org/apache/jena/util/JenaXMLInput.java b/jena-core/src/main/java/org/apache/jena/util/JenaXMLInput.java
new file mode 100644
index 0000000..298d693
--- /dev/null
+++ b/jena-core/src/main/java/org/apache/jena/util/JenaXMLInput.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.util;
+
+import java.io.InputStream;
+import java.io.Reader;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+import org.apache.jena.atlas.logging.Log;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+/**
+ * Create XML input methods.
+ * External DTD processing is disabled and will be silently ignored to prevent
+ * <a href="https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing">XXE Processing</a>
+ * problems.
+ */
+public class JenaXMLInput {
+ // ---- SAX
+ // RDFXMLParser
+ private static SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
+
+ public static XMLReader createXMLReader() throws ParserConfigurationException, SAXException {
+ SAXParser saxParser = saxParserFactory.newSAXParser();
+ XMLReader xmlreader = saxParser.getXMLReader();
+
+ // XXE : either disable all DTD processing ...
+// // EFFECT: RIOT Error if DTD.
+// xmlreader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
+// // This may not be strictly required as DTDs shouldn't be allowed at all, per previous line.
+// xmlreader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+
+ // ... just ignore external DTDs (silently ignore)
+ xmlreader.setFeature("http://xml.org/sax/features/external-general-entities", false);
+ xmlreader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
+ return xmlreader;
+ }
+
+ // ---- StAX
+ // TriX and results.
+ private static XMLInputFactory xf = XMLInputFactory.newInstance() ;
+ static {
+ try {
+ // // This disables DTDs entirely for that factory
+ // xf.setProperty(XMLInputFactory.SUPPORT_DTD, false);
+ // disable external entities (silently ignore)
+ xf.setProperty("javax.xml.stream.isSupportingExternalEntities", false);
+ } catch(IllegalArgumentException ex){
+ Log.error(JenaXMLInput.class, "Problem setting StAX property", ex);
+ }
+ }
+
+ public static XMLStreamReader newXMLStreamReader(InputStream in) throws XMLStreamException {
+ return xf.createXMLStreamReader(in) ;
+ }
+
+ public static XMLStreamReader newXMLStreamReader(Reader in) throws XMLStreamException {
+ return xf.createXMLStreamReader(in) ;
+ }
+
+ // ---- DocumentBuilder
+ // For reference - not used in Jena src/main, but is in src/test DOM2RDFTest and MoreDOM2RDFTest
+ public static DocumentBuilderFactory newDocumentBuilderFactory() throws ParserConfigurationException {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ // Causes SAXParseException if there is an external entity.
+ factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ return factory;
+ }
+}
diff --git a/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/DOM2RDFTest.java b/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/DOM2RDFTest.java
index de63fc9..fa00b81 100644
--- a/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/DOM2RDFTest.java
+++ b/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/DOM2RDFTest.java
@@ -27,6 +27,7 @@ import javax.xml.parsers.ParserConfigurationException;
import org.apache.jena.rdf.model.Model ;
import org.apache.jena.shared.JenaException ;
+import org.apache.jena.util.JenaXMLInput;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
@@ -40,34 +41,32 @@ class DOM2RDFTest extends SAX2RDFTest {
public DOM2RDFTest(String dir, String base0, String file) {
super(dir, base0, file);
}
-
- static private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- // DOM must have namespace information inside it!
- static { factory.setNamespaceAware(true);}
+
static private DocumentBuilder domParser;
-
+
static {
try {
- domParser = factory.newDocumentBuilder();
+ DocumentBuilderFactory factory = JenaXMLInput.newDocumentBuilderFactory();
+ factory.setNamespaceAware(true);
+ domParser = factory.newDocumentBuilder();
}
catch (ParserConfigurationException rte){
throw new JenaException(rte);
}
}
-
@Override
void loadXMLModel(Model m2, InputStream in, RDFEHArray eh2) throws SAXException, IOException {
-
+
Document document = domParser
.parse(in,base);
-
+
// Make DOM into transformer input
// Source input = new DOMSource(document);
- DOM2Model d2m = DOM2Model.createD2M(base,m2);
+ DOM2Model d2m = DOM2Model.createD2M(base,m2);
d2m.setErrorHandler(eh2);
-
+
// try {
try {
d2m.load(document);
@@ -77,7 +76,7 @@ class DOM2RDFTest extends SAX2RDFTest {
// } catch (SAXParseException e) {
// // already reported, leave it be.
// }
-
+
}
diff --git a/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/MoreDOM2RDFTest.java b/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/MoreDOM2RDFTest.java
index 93ef1a7..4379a96 100644
--- a/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/MoreDOM2RDFTest.java
+++ b/jena-core/src/test/java/org/apache/jena/rdfxml/xmlinput/MoreDOM2RDFTest.java
@@ -27,6 +27,8 @@ import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import junit.framework.TestCase;
+import org.apache.jena.shared.JenaException;
+import org.apache.jena.util.JenaXMLInput;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
@@ -37,38 +39,36 @@ public class MoreDOM2RDFTest extends TestCase implements StatementHandler {
public MoreDOM2RDFTest(String name) {
super(name);
}
-
- static private DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- // DOM must have namespace information inside it!
- static { factory.setNamespaceAware(true);}
- static private DocumentBuilder domParser;
-
- static {
- try {
- domParser = factory.newDocumentBuilder();
- }
- catch (ParserConfigurationException rte){
- throw new RuntimeException(rte);
- }
- }
-
+
+ static private DocumentBuilder domParser;
+
+ static {
+ try {
+ DocumentBuilderFactory factory = JenaXMLInput.newDocumentBuilderFactory();
+ factory.setNamespaceAware(true);
+ domParser = factory.newDocumentBuilder();
+ }
+ catch (ParserConfigurationException rte){
+ throw new JenaException(rte);
+ }
+ }
public void testDOMwithARP() throws SAXException, IOException {
-
+
InputStream in = new FileInputStream("testing/wg/Class/conclusions001.rdf");
Document document = domParser
.parse(in,"http://www.example.org/");
-
- DOM2Model d2m = DOM2Model.createD2M("http://www.example.org/",null);
+
+ DOM2Model d2m = DOM2Model.createD2M("http://www.example.org/",null);
d2m.getHandlers().setStatementHandler(this);
-
+
try {
d2m.load(document);
} finally {
d2m.close();
}
-
+
assertEquals("Incorrect number of triples",3,count);
}
@@ -77,14 +77,14 @@ public class MoreDOM2RDFTest extends TestCase implements StatementHandler {
@Override
public void statement(AResource subj, AResource pred, AResource obj) {
count++;
-
+
}
@Override
public void statement(AResource subj, AResource pred, ALiteral lit) {
count++;
-
+
}
}