You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/06/16 16:54:41 UTC

[tika] branch master updated: Disable external DTD + Stylesheets with the TransformerFactory (#276)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new cd90587  Disable external DTD + Stylesheets with the TransformerFactory (#276)
cd90587 is described below

commit cd90587f5d194c1f0e9b5cd5eeb8a995ecb3868f
Author: Colm O hEigeartaigh <co...@users.noreply.github.com>
AuthorDate: Tue Jun 16 17:54:34 2020 +0100

    Disable external DTD + Stylesheets with the TransformerFactory (#276)
    
    Sorry for our delay.  Thank you!
---
 .../java/org/apache/tika/config/TikaConfigSerializer.java  |  4 +---
 .../main/java/org/apache/tika/mime/MimeTypesReader.java    |  8 +++-----
 .../main/java/org/apache/tika/utils/XMLReaderUtils.java    | 14 ++++++++++++++
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index dda1675..2e16567 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -27,7 +27,6 @@ import java.util.concurrent.ExecutorService;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
@@ -93,8 +92,7 @@ public class TikaConfigSerializer {
         // TODO Service Loader section
 
         // now write
-        TransformerFactory transformerFactory = TransformerFactory.newInstance();
-        Transformer transformer = transformerFactory.newTransformer();
+        Transformer transformer = XMLReaderUtils.getTransformer();
         transformer.setOutputProperty(OutputKeys.INDENT, "yes");
         transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
         transformer.setOutputProperty(OutputKeys.ENCODING, charset.name());
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
index f255ce4..201cfb2 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypesReader.java
@@ -22,7 +22,6 @@ import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerException;
-import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.sax.SAXResult;
 import java.io.ByteArrayInputStream;
@@ -38,6 +37,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.utils.XMLReaderUtils;
 import org.w3c.dom.Document;
 import org.xml.sax.Attributes;
 import org.xml.sax.InputSource;
@@ -150,11 +150,9 @@ public class MimeTypesReader extends DefaultHandler implements MimeTypesReaderMe
 
     public void read(Document document) throws MimeTypeException {
         try {
-            TransformerFactory factory = TransformerFactory.newInstance();
-            factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-            Transformer transformer = factory.newTransformer();
+            Transformer transformer = XMLReaderUtils.getTransformer();
             transformer.transform(new DOMSource(document), new SAXResult(this));
-        } catch (TransformerException e) {
+        } catch (TransformerException | TikaException e) {
             throw new MimeTypeException("Failed to parse type registry", e);
         }
     }
diff --git a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
index 676eb67..e30374c 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/XMLReaderUtils.java
@@ -333,6 +333,18 @@ public class XMLReaderUtils implements Serializable {
         return factory;
     }
 
+    private static void trySetTransformerAttribute(TransformerFactory transformerFactory, String attribute, String value) {
+        try {
+            transformerFactory.setAttribute(attribute, value);
+        } catch (SecurityException e) {
+            throw e;
+        } catch (Exception e) {
+            LOG.warn("Transformer Attribute unsupported: {}", attribute, e);
+        } catch (AbstractMethodError ame) {
+            LOG.warn("Cannot set Transformer attribute because outdated XML parser in classpath: {}", attribute, ame);
+        }
+    }
+
     private static void trySetSAXFeature(SAXParserFactory saxParserFactory, String feature, boolean enabled) {
         try {
             saxParserFactory.setFeature(feature, enabled);
@@ -377,6 +389,8 @@ public class XMLReaderUtils implements Serializable {
         try {
             TransformerFactory transformerFactory = TransformerFactory.newInstance();
             transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+            trySetTransformerAttribute(transformerFactory, XMLConstants.ACCESS_EXTERNAL_DTD, "");
+            trySetTransformerAttribute(transformerFactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
             return transformerFactory.newTransformer();
         } catch (TransformerConfigurationException | TransformerFactoryConfigurationError e) {
             throw new TikaException("Transformer not available", e);