You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2017/04/27 14:46:12 UTC
[tika] 01/04: TIKA-2345 Tika Config Serialisation of
EncodingDetector details
This is an automated email from the ASF dual-hosted git repository.
nick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4d3a43c1682ca88f8c5a88ea1b34cd6fb105f997
Author: Nick Burch <ni...@gagravarr.org>
AuthorDate: Thu Apr 27 15:31:16 2017 +0100
TIKA-2345 Tika Config Serialisation of EncodingDetector details
---
.../apache/tika/config/TikaConfigSerializer.java | 36 +++++++++++++++++++++-
1 file changed, 35 insertions(+), 1 deletion(-)
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index 79e20ca..0701955 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -22,6 +22,7 @@ import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
+
import java.io.Writer;
import java.nio.charset.Charset;
import java.util.Collections;
@@ -30,8 +31,11 @@ import java.util.Set;
import java.util.TreeSet;
import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.CompositeEncodingDetector;
import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.DefaultEncodingDetector;
import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.language.translate.DefaultTranslator;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.mime.MediaType;
@@ -79,6 +83,7 @@ public class TikaConfigSerializer {
addMimeComment(mode, rootElement, doc);
addServiceLoader(mode, rootElement, doc, config);
addExecutorService(mode, rootElement, doc, config);
+ addEncodingDetectors(mode, rootElement, doc, config);
addTranslator(mode, rootElement, doc, config);
addDetectors(mode, rootElement, doc, config);
addParsers(mode, rootElement, doc, config);
@@ -97,7 +102,7 @@ public class TikaConfigSerializer {
}
private static void addExecutorService(Mode mode, Element rootElement, Document doc, TikaConfig config) {
- //TODO
+ // TODO Implement the reverse of ExecutorServiceXmlLoader
}
private static void addServiceLoader(Mode mode, Element rootElement, Document doc, TikaConfig config) {
@@ -146,6 +151,35 @@ public class TikaConfigSerializer {
rootElement.appendChild(mimeComment);
}
+ private static void addEncodingDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception {
+ EncodingDetector encDetector = config.getEncodingDetector();
+
+ if (mode == Mode.MINIMAL && encDetector instanceof DefaultEncodingDetector) {
+ // Don't output anything, all using defaults
+ Node detComment = doc.createComment(
+ "for example: <encodingDetectors><encodingDetector class=\""
+ + "org.apache.tika.detect.DefaultEncodingDetector\"></encodingDetectors>");
+ rootElement.appendChild(detComment);
+ return;
+ }
+
+ Element encDetectorsElement = doc.createElement("encodingDetectors");
+ if (mode == Mode.CURRENT && encDetector instanceof DefaultEncodingDetector ||
+ ! (encDetector instanceof CompositeEncodingDetector)) {
+ Element encDetectorElement = doc.createElement("encodingDetector");
+ encDetectorElement.setAttribute("class", encDetector.getClass().getCanonicalName());
+ encDetectorsElement.appendChild(encDetectorElement);
+ } else {
+ List<EncodingDetector> children = ((CompositeEncodingDetector)encDetector).getDetectors();
+ for (EncodingDetector d : children) {
+ Element encDetectorElement = doc.createElement("encodingDetector");
+ encDetectorElement.setAttribute("class", d.getClass().getCanonicalName());
+ encDetectorsElement.appendChild(encDetectorElement);
+ }
+ }
+ rootElement.appendChild(encDetectorsElement);
+ }
+
private static void addDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception {
Detector detector = config.getDetector();
--
To stop receiving notification emails like this one, please contact
"commits@tika.apache.org" <co...@tika.apache.org>.