You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@syncope.apache.org by il...@apache.org on 2021/11/10 13:56:29 UTC
[syncope] 02/02: [SYNCOPE-1649] Using Tika's ToTextContentHandler
for CSV output
This is an automated email from the ASF dual-hosted git repository.
ilgrosso pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/syncope.git
commit f62714915fc141118b043bd56087468b64b470a7
Author: Francesco Chicchiriccò <il...@apache.org>
AuthorDate: Wed Nov 10 14:45:47 2021 +0100
[SYNCOPE-1649] Using Tika's ToTextContentHandler for CSV output
---
core/idrepo/logic/pom.xml | 5 ++
.../syncope/core/logic/cocoon/TextSerializer.java | 69 +++++-----------------
2 files changed, 19 insertions(+), 55 deletions(-)
diff --git a/core/idrepo/logic/pom.xml b/core/idrepo/logic/pom.xml
index 6e43888..dd1fe37 100644
--- a/core/idrepo/logic/pom.xml
+++ b/core/idrepo/logic/pom.xml
@@ -67,6 +67,11 @@ under the License.
</dependency>
<dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
+
+ <dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
</dependency>
diff --git a/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java b/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java
index 8d60855..8a86649 100644
--- a/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java
+++ b/core/idrepo/logic/src/main/java/org/apache/syncope/core/logic/cocoon/TextSerializer.java
@@ -18,66 +18,25 @@
*/
package org.apache.syncope.core.logic.cocoon;
-import org.apache.cocoon.sax.component.XMLSerializer;
-import org.xml.sax.Attributes;
-import org.xml.sax.Locator;
-import org.xml.sax.SAXException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.charset.StandardCharsets;
+import org.apache.cocoon.pipeline.caching.CacheKey;
+import org.apache.cocoon.pipeline.caching.SimpleCacheKey;
+import org.apache.cocoon.pipeline.component.CachingPipelineComponent;
+import org.apache.cocoon.sax.AbstractSAXSerializer;
+import org.apache.tika.sax.ToTextContentHandler;
-/**
- * Converts XML into plain text. It omits all XML tags and writes only character events to the output. Input document
- * must have at least one element - root element - which should wrap all the text inside it.
- *
- */
-public class TextSerializer extends XMLSerializer {
-
- private static final String UTF_8 = "UTF-8";
-
- private static final String TXT = "text";
-
- public TextSerializer() {
- super();
- super.setOmitXmlDeclaration(true);
- }
-
- @Override
- public void setDocumentLocator(final Locator locator) {
- // nothing
- }
-
- @Override
- public void processingInstruction(final String target, final String data)
- throws SAXException {
- // nothing
- }
-
- @Override
- public void startDTD(final String name, final String publicId, final String systemId)
- throws SAXException {
- // nothing
- }
+public class TextSerializer extends AbstractSAXSerializer implements CachingPipelineComponent {
@Override
- public void endDTD() throws SAXException {
- // nothing
+ public void setOutputStream(final OutputStream outputStream) {
+ super.setOutputStream(outputStream);
+ setContentHandler(new ToTextContentHandler(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)));
}
@Override
- public void startElement(final String uri, final String loc, final String raw, final Attributes atts)
- throws SAXException {
- // nothing
- }
-
- @Override
- public void endElement(final String uri, final String name, final String raw)
- throws SAXException {
- // nothing
- }
-
- public static TextSerializer createPlainSerializer() {
- final TextSerializer serializer = new TextSerializer();
- serializer.setContentType("text/plain; charset=" + UTF_8);
- serializer.setEncoding(UTF_8);
- serializer.setMethod(TXT);
- return serializer;
+ public CacheKey constructCacheKey() {
+ return new SimpleCacheKey();
}
}