You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/03/09 14:11:24 UTC
[opennlp] branch main updated: OPENNLP-1476 Modernize DictionaryEntryPersistor to create XMLReader via javax.xml.parsers.SAXParserFactory (#512)
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new 8535da98 OPENNLP-1476 Modernize DictionaryEntryPersistor to create XMLReader via javax.xml.parsers.SAXParserFactory (#512)
8535da98 is described below
commit 8535da98c4027003a719666952f09ab5c20511af
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Thu Mar 9 15:10:34 2023 +0100
OPENNLP-1476 Modernize DictionaryEntryPersistor to create XMLReader via javax.xml.parsers.SAXParserFactory (#512)
---
.../serializer/DictionaryEntryPersistor.java | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index 3394604c..af210726 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -24,6 +24,8 @@ import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
@@ -37,7 +39,6 @@ import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
-import org.xml.sax.helpers.XMLReaderFactory;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;
@@ -51,6 +52,9 @@ import opennlp.tools.util.model.UncloseableInputStream;
* @see Dictionary
*/
public class DictionaryEntryPersistor {
+
+ private static final SAXParserFactory SAX_PARSER_FACTORY = SAXParserFactory.newInstance();
+ private static final String SAX_FEATURE_NAMESPACES = "http://xml.org/sax/features/namespaces";
// TODO: should check for invalid format, make it save
private static class DictionaryContenthandler implements ContentHandler {
@@ -199,14 +203,11 @@ public class DictionaryEntryPersistor {
}
}
- private static final String CHARSET = StandardCharsets.UTF_8.name();
-
private static final String DICTIONARY_ELEMENT = "dictionary";
private static final String ENTRY_ELEMENT = "entry";
private static final String TOKEN_ELEMENT = "token";
private static final String ATTRIBUTE_CASE_SENSITIVE = "case_sensitive";
-
/**
* Creates {@link Entry}s from the given {@link InputStream} and
* forwards these {@link Entry}s to the {@link EntryInserter}.
@@ -225,16 +226,19 @@ public class DictionaryEntryPersistor {
public static boolean create(InputStream in, EntryInserter inserter)
throws IOException {
- DictionaryContenthandler profileContentHandler =
- new DictionaryContenthandler(inserter);
+ DictionaryContenthandler profileContentHandler = new DictionaryContenthandler(inserter);
XMLReader xmlReader;
try {
- xmlReader = XMLReaderFactory.createXMLReader();
+ xmlReader = SAX_PARSER_FACTORY.newSAXParser().getXMLReader();
+ // Note:
+ // There is a compatibility problem here: JAXP default is false while SAX 2 default is true!
+ // OpenNLP requires it activated!
+ xmlReader.setFeature(SAX_FEATURE_NAMESPACES, true);
xmlReader.setContentHandler(profileContentHandler);
xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
}
- catch (SAXException e) {
+ catch (ParserConfigurationException | SAXException e) {
throw new InvalidFormatException("The profile data stream has " +
"an invalid format!", e);
}
@@ -290,7 +294,7 @@ public class DictionaryEntryPersistor {
}
Transformer serializer = hd.getTransformer();
- serializer.setOutputProperty(OutputKeys.ENCODING, CHARSET);
+ serializer.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
hd.setResult(streamResult);