You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/06/16 16:54:41 UTC
[tika] branch master updated: Disable external DTD + Stylesheets
with the TransformerFactory (#276)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new cd90587 Disable external DTD + Stylesheets with the TransformerFactory (#276)
cd90587 is described below
commit cd90587f5d194c1f0e9b5cd5eeb8a995ecb3868f
Author: Colm O hEigeartaigh <co...@users.noreply.github.com>
AuthorDate: Tue Jun 16 17:54:34 2020 +0100
Disable external DTD + Stylesheets with the TransformerFactory (#276)
Sorry for our delay. Thank you!
---
.../java/org/apache/tika/config/TikaConfigSerializer.java | 4 +---
.../main/java/org/apache/tika/mime/MimeTypesReader.java | 8 +++-----
.../main/java/org/apache/tika/utils/XMLReaderUtils.java | 14 ++++++++++++++
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index dda1675..2e16567 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -27,7 +27,6 @@ import java.util.concurrent.ExecutorService;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
@@ -93,8 +92,7 @@ public class TikaConfigSerializer {
// TODO Service Loader section
// now write
- TransformerFactory transformerFactory = TransformerFactory.newInstance();
- Transformer transformer = transformerFactory.newTransformer();
+ Transformer transformer = XMLReaderUtils.getTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
transformer.setOutputProperty(OutputKeys.ENCODING, charset.name());
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
index f255ce4..201cfb2 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
@@ -22,7 +22,6 @@ import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import java.io.ByteArrayInputStream;
@@ -38,6 +37,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.utils.XMLReaderUtils;
import org.w3c.dom.Document;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
@@ -150,11 +150,9 @@ public class MimeTypesReader extends DefaultHandler implements MimeTypesReaderMe
public void read(Document document) throws MimeTypeException {
try {
- TransformerFactory factory = TransformerFactory.newInstance();
- factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
- Transformer transformer = factory.newTransformer();
+ Transformer transformer = XMLReaderUtils.getTransformer();
transformer.transform(new DOMSource(document), new SAXResult(this));
- } catch (TransformerException e) {
+ } catch (TransformerException | TikaException e) {
throw new MimeTypeException("Failed to parse type registry", e);
}
}
diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 676eb67..e30374c 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -333,6 +333,18 @@ public class XMLReaderUtils implements Serializable {
return factory;
}
+ private static void trySetTransformerAttribute(TransformerFactory transformerFactory, String attribute, String value) {
+ try {
+ transformerFactory.setAttribute(attribute, value);
+ } catch (SecurityException e) {
+ throw e;
+ } catch (Exception e) {
+ LOG.warn("Transformer Attribute unsupported: {}", attribute, e);
+ } catch (AbstractMethodError ame) {
+ LOG.warn("Cannot set Transformer attribute because outdated XML parser in classpath: {}", attribute, ame);
+ }
+ }
+
private static void trySetSAXFeature(SAXParserFactory saxParserFactory, String feature, boolean enabled) {
try {
saxParserFactory.setFeature(feature, enabled);
@@ -377,6 +389,8 @@ public class XMLReaderUtils implements Serializable {
try {
TransformerFactory transformerFactory = TransformerFactory.newInstance();
transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ trySetTransformerAttribute(transformerFactory, XMLConstants.ACCESS_EXTERNAL_DTD, "");
+ trySetTransformerAttribute(transformerFactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
return transformerFactory.newTransformer();
} catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
throw new TikaException("Transformer not available", e);