You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cxf.apache.org by re...@apache.org on 2016/09/10 14:41:30 UTC
[22/37] cxf git commit: Updating Spark demo to accept PDF and ODT
Updating Spark demo to accept PDF and ODT
Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/f2db2250
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/f2db2250
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/f2db2250
Branch: refs/heads/master-jaxrs-2.1
Commit: f2db225034f82e93617aa8aece8bf059b8758626
Parents: f5a1c14
Author: Sergey Beryozkin <sb...@gmail.com>
Authored: Thu Sep 8 18:27:52 2016 +0100
Committer: Sergey Beryozkin <sb...@gmail.com>
Committed: Thu Sep 8 18:27:52 2016 +0100
----------------------------------------------------------------------
.../release/samples/jax_rs/spark/README.txt | 2 +-
.../main/release/samples/jax_rs/spark/pom.xml | 5 ++++
.../demo/jaxrs/server/StreamingService.java | 27 ++++++++++++++++++--
3 files changed, 31 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/cxf/blob/f2db2250/distribution/src/main/release/samples/jax_rs/spark/README.txt
----------------------------------------------------------------------
diff --git a/distribution/src/main/release/samples/jax_rs/spark/README.txt b/distribution/src/main/release/samples/jax_rs/spark/README.txt
index 8a7b292..b10a44b 100644
--- a/distribution/src/main/release/samples/jax_rs/spark/README.txt
+++ b/distribution/src/main/release/samples/jax_rs/spark/README.txt
@@ -11,7 +11,7 @@ Next do:
1. Simple text processing:
-curl -X POST -H "Accept: text/plain" -H "Content-Type: text/plain" -d "Hello Spark" http://localhost:9000/stream
+curl -X POST -H "Accept: text/plain" -H "Content-Type: text/plain" -d "Hello Spark" http://localhost:9000/spark/stream
2. PDF processing:
http://git-wip-us.apache.org/repos/asf/cxf/blob/f2db2250/distribution/src/main/release/samples/jax_rs/spark/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/src/main/release/samples/jax_rs/spark/pom.xml b/distribution/src/main/release/samples/jax_rs/spark/pom.xml
index 10a00da..0ba37c1 100644
--- a/distribution/src/main/release/samples/jax_rs/spark/pom.xml
+++ b/distribution/src/main/release/samples/jax_rs/spark/pom.xml
@@ -74,6 +74,11 @@
<artifactId>tika-parser-pdf-module</artifactId>
<version>2.0-SNAPSHOT</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parser-office-module</artifactId>
+ <version>2.0-SNAPSHOT</version>
+ </dependency>
</dependencies>
<repositories>
http://git-wip-us.apache.org/repos/asf/cxf/blob/f2db2250/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
----------------------------------------------------------------------
diff --git a/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java b/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
index 5e059fc..4f82b5e 100644
--- a/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
+++ b/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
@@ -20,6 +20,7 @@ package demo.jaxrs.server;
import java.io.InputStream;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
@@ -35,6 +36,7 @@ import javax.ws.rs.Produces;
import javax.ws.rs.WebApplicationException;
import javax.ws.rs.container.AsyncResponse;
import javax.ws.rs.container.Suspended;
+import javax.ws.rs.core.MediaType;
import org.apache.cxf.common.util.Base64Utility;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
@@ -54,6 +56,7 @@ import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.receiver.Receiver;
+import org.apache.tika.parser.odf.OpenDocumentParser;
import org.apache.tika.parser.pdf.PDFParser;
import scala.Tuple2;
@@ -61,6 +64,12 @@ import scala.Tuple2;
@Path("/")
public class StreamingService {
+ private static final Map<String, MediaType> MEDIA_TYPE_TABLE;
+ static {
+ MEDIA_TYPE_TABLE = new HashMap<String, MediaType>();
+ MEDIA_TYPE_TABLE.put("pdf", MediaType.valueOf("application/pdf"));
+ MEDIA_TYPE_TABLE.put("odt", MediaType.valueOf("application/vnd.oasis.opendocument.text"));
+ }
private Executor executor = new ThreadPoolExecutor(5, 5, 0, TimeUnit.SECONDS,
new ArrayBlockingQueue<Runnable>(10));
public StreamingService() {
@@ -72,8 +81,22 @@ public class StreamingService {
@Produces("text/plain")
public void processMultipartStream(@Suspended AsyncResponse async,
@Multipart("file") Attachment att) {
- TikaContentExtractor tika = new TikaContentExtractor(new PDFParser());
- TikaContent tikaContent = tika.extract(att.getObject(InputStream.class));
+ TikaContentExtractor tika = new TikaContentExtractor(
+ Arrays.asList(new PDFParser(), new OpenDocumentParser()));
+
+ MediaType mediaType = att.getContentType();
+ if (mediaType == null) {
+ String fileName = att.getContentDisposition().getFilename();
+ if (fileName != null) {
+ int extDot = fileName.lastIndexOf('.');
+ if (extDot > 0) {
+ mediaType = MEDIA_TYPE_TABLE.get(fileName.substring(extDot + 1));
+ }
+ }
+ }
+
+ TikaContent tikaContent = tika.extract(att.getObject(InputStream.class),
+ mediaType);
processStream(async, new TikaReceiver(tikaContent));
}