You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2021/01/13 18:56:14 UTC

[nutch] branch master updated: NUTCH-2841 Upgrade xercesImpl dependency (#563)

This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 59c63c7  NUTCH-2841 Upgrade xercesImpl dependency (#563)
59c63c7 is described below

commit 59c63c7d8a13b0de1fd1da6aa4a1ab6e20fa478d
Author: Lewis John McGibbney <le...@gmail.com>
AuthorDate: Wed Jan 13 10:56:07 2021 -0800

    NUTCH-2841 Upgrade xercesImpl dependency (#563)
    
    * NUTCH-2841 Upgrade xercesImpl dependency
---
 ivy/ivy.xml                                     | 2 +-
 src/java/org/apache/nutch/tools/DmozParser.java | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index ad1e65f..3f1faf3 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -66,7 +66,7 @@
 		<dependency org="org.apache.tika" name="tika-core" rev="1.25" />
 
 		<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/><!-- force this version as it is required by Tika -->
-		<dependency org="xerces" name="xercesImpl" rev="2.12.0" />
+		<dependency org="xerces" name="xercesImpl" rev="2.12.1" />
 
 		<dependency org="com.ibm.icu" name="icu4j" rev="68.2" />
 
diff --git a/src/java/org/apache/nutch/tools/DmozParser.java b/src/java/org/apache/nutch/tools/DmozParser.java
index 63dbde8..a447646 100644
--- a/src/java/org/apache/nutch/tools/DmozParser.java
+++ b/src/java/org/apache/nutch/tools/DmozParser.java
@@ -276,8 +276,11 @@ public class DmozParser {
               throws IOException, SAXException, ParserConfigurationException {
 
     SAXParserFactory parserFactory = SAXParserFactory.newInstance();
+    parserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
+    parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
     SAXParser parser = parserFactory.newSAXParser();
     XMLReader reader = parser.getXMLReader();
+    reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
 
     // Create our own processor to receive SAX events
     RDFProcessor rp = new RDFProcessor(reader, subsetDenom, includeAdult, skew,