You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@syncope.apache.org by il...@apache.org on 2021/11/10 13:56:29 UTC

[syncope] 02/02: [SYNCOPE-1649] Using Tika's ToTextContentHandler for CSV output

This is an automated email from the ASF dual-hosted git repository.

ilgrosso pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/syncope.git

commit f62714915fc141118b043bd56087468b64b470a7
Author: Francesco Chicchiriccò <il...@apache.org>
AuthorDate: Wed Nov 10 14:45:47 2021 +0100

    [SYNCOPE-1649] Using Tika's ToTextContentHandler for CSV output
---
 core/idrepo/logic/pom.xml                          |  5 ++
 .../syncope/core/logic/cocoon/TextSerializer.java  | 69 +++++-----------------
 2 files changed, 19 insertions(+), 55 deletions(-)

diff --git a/core/idrepo/logic/pom.xml b/core/idrepo/logic/pom.xml
index 6e43888..dd1fe37 100644
--- a/core/idrepo/logic/pom.xml
+++ b/core/idrepo/logic/pom.xml
@@ -67,6 +67,11 @@ under the License.
     </dependency>
 
     <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>org.apache.logging.log4j</groupId>
       <artifactId>log4j-core</artifactId>
     </dependency>
diff --git a/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java b/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java
index 8d60855..8a86649 100644
--- a/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java
+++ b/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java
@@ -18,66 +18,25 @@
  */
 package org.apache.syncope.core.logic.cocoon;
 
-import org.apache.cocoon.sax.component.XMLSerializer;
-import org.xml.sax.Attributes;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
+import org.apache.cocoon.pipeline.caching.CacheKey;
+import org.apache.cocoon.pipeline.caching.SimpleCacheKey;
+import org.apache.cocoon.pipeline.component.CachingPipelineComponent;
+import org.apache.cocoon.sax.AbstractSAXSerializer;
+import org.apache.tika.sax.ToTextContentHandler;
 
-/**
- * Converts XML into plain text. It omits all XML tags and writes only character events to the output. Input document
- * must have at least one element - root element - which should wrap all the text inside it.
- *
- */
-public class TextSerializer extends XMLSerializer {
-
-    private static final String UTF_8 = "UTF-8";
-
-    private static final String TXT = "text";
-
-    public TextSerializer() {
-        super();
-        super.setOmitXmlDeclaration(true);
-    }
-
-    @Override
-    public void setDocumentLocator(final Locator locator) {
-        // nothing
-    }
-
-    @Override
-    public void processingInstruction(final String target, final String data)
-            throws SAXException {
-        // nothing
-    }
-
-    @Override
-    public void startDTD(final String name, final String publicId, final String systemId)
-            throws SAXException {
-        // nothing
-    }
+public class TextSerializer extends AbstractSAXSerializer implements CachingPipelineComponent {
 
     @Override
-    public void endDTD() throws SAXException {
-        // nothing
+    public void setOutputStream(final OutputStream outputStream) {
+        super.setOutputStream(outputStream);
+        setContentHandler(new ToTextContentHandler(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)));
     }
 
     @Override
-    public void startElement(final String uri, final String loc, final String raw, final Attributes atts)
-            throws SAXException {
-        // nothing
-    }
-
-    @Override
-    public void endElement(final String uri, final String name, final String raw)
-            throws SAXException {
-        // nothing
-    }
-
-    public static TextSerializer createPlainSerializer() {
-        final TextSerializer serializer = new TextSerializer();
-        serializer.setContentType("text/plain; charset=" + UTF_8);
-        serializer.setEncoding(UTF_8);
-        serializer.setMethod(TXT);
-        return serializer;
+    public CacheKey constructCacheKey() {
+        return new SimpleCacheKey();
     }
 }