You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cxf.apache.org by re...@apache.org on 2016/09/10 14:41:30 UTC

[22/37] cxf git commit: Updating Spark demo to accept PDF and ODT

Updating Spark demo to accept PDF and ODT


Project: http://git-wip-us.apache.org/repos/asf/cxf/repo
Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/f2db2250
Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/f2db2250
Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/f2db2250

Branch: refs/heads/master-jaxrs-2.1
Commit: f2db225034f82e93617aa8aece8bf059b8758626
Parents: f5a1c14
Author: Sergey Beryozkin <sb...@gmail.com>
Authored: Thu Sep 8 18:27:52 2016 +0100
Committer: Sergey Beryozkin <sb...@gmail.com>
Committed: Thu Sep 8 18:27:52 2016 +0100

----------------------------------------------------------------------
 .../release/samples/jax_rs/spark/README.txt     |  2 +-
 .../main/release/samples/jax_rs/spark/pom.xml   |  5 ++++
 .../demo/jaxrs/server/StreamingService.java     | 27 ++++++++++++++++++--
 3 files changed, 31 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cxf/blob/f2db2250/distribution/src/main/release/samples/jax_rs/spark/README.txt
----------------------------------------------------------------------
diff --git a/distribution/src/main/release/samples/jax_rs/spark/README.txt b/distribution/src/main/release/samples/jax_rs/spark/README.txt
index 8a7b292..b10a44b 100644
--- a/distribution/src/main/release/samples/jax_rs/spark/README.txt
+++ b/distribution/src/main/release/samples/jax_rs/spark/README.txt
@@ -11,7 +11,7 @@ Next do:
 
 1. Simple text processing:
 
-curl -X POST -H "Accept: text/plain" -H "Content-Type: text/plain" -d "Hello Spark" http://localhost:9000/stream
+curl -X POST -H "Accept: text/plain" -H "Content-Type: text/plain" -d "Hello Spark" http://localhost:9000/spark/stream
 
 2. PDF processing:
 

http://git-wip-us.apache.org/repos/asf/cxf/blob/f2db2250/distribution/src/main/release/samples/jax_rs/spark/pom.xml
----------------------------------------------------------------------
diff --git a/distribution/src/main/release/samples/jax_rs/spark/pom.xml b/distribution/src/main/release/samples/jax_rs/spark/pom.xml
index 10a00da..0ba37c1 100644
--- a/distribution/src/main/release/samples/jax_rs/spark/pom.xml
+++ b/distribution/src/main/release/samples/jax_rs/spark/pom.xml
@@ -74,6 +74,11 @@
             <artifactId>tika-parser-pdf-module</artifactId>
             <version>2.0-SNAPSHOT</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parser-office-module</artifactId>
+            <version>2.0-SNAPSHOT</version>
+        </dependency>
     </dependencies>
 
     <repositories>

http://git-wip-us.apache.org/repos/asf/cxf/blob/f2db2250/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
----------------------------------------------------------------------
diff --git a/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java b/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
index 5e059fc..4f82b5e 100644
--- a/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
+++ b/distribution/src/main/release/samples/jax_rs/spark/src/main/java/demo/jaxrs/server/StreamingService.java
@@ -20,6 +20,7 @@ package demo.jaxrs.server;
 
 import java.io.InputStream;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Random;
@@ -35,6 +36,7 @@ import javax.ws.rs.Produces;
 import javax.ws.rs.WebApplicationException;
 import javax.ws.rs.container.AsyncResponse;
 import javax.ws.rs.container.Suspended;
+import javax.ws.rs.core.MediaType;
 
 import org.apache.cxf.common.util.Base64Utility;
 import org.apache.cxf.jaxrs.ext.multipart.Attachment;
@@ -54,6 +56,7 @@ import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
 import org.apache.spark.streaming.receiver.Receiver;
+import org.apache.tika.parser.odf.OpenDocumentParser;
 import org.apache.tika.parser.pdf.PDFParser;
 
 import scala.Tuple2;
@@ -61,6 +64,12 @@ import scala.Tuple2;
 
 @Path("/")
 public class StreamingService {
+    private static final Map<String, MediaType> MEDIA_TYPE_TABLE;
+    static {
+        MEDIA_TYPE_TABLE = new HashMap<String, MediaType>();
+        MEDIA_TYPE_TABLE.put("pdf", MediaType.valueOf("application/pdf"));
+        MEDIA_TYPE_TABLE.put("odt", MediaType.valueOf("application/vnd.oasis.opendocument.text"));
+    }
     private Executor executor = new ThreadPoolExecutor(5, 5, 0, TimeUnit.SECONDS,
                                                        new ArrayBlockingQueue<Runnable>(10));
     public StreamingService() {
@@ -72,8 +81,22 @@ public class StreamingService {
     @Produces("text/plain")
     public void processMultipartStream(@Suspended AsyncResponse async, 
                                        @Multipart("file") Attachment att) {
-        TikaContentExtractor tika = new TikaContentExtractor(new PDFParser());
-        TikaContent tikaContent = tika.extract(att.getObject(InputStream.class));
+        TikaContentExtractor tika = new TikaContentExtractor(
+            Arrays.asList(new PDFParser(), new OpenDocumentParser()));
+        
+        MediaType mediaType = att.getContentType();
+        if (mediaType == null) {
+            String fileName = att.getContentDisposition().getFilename();
+            if (fileName != null) {
+                int extDot = fileName.lastIndexOf('.');
+                if (extDot > 0) {
+                    mediaType = MEDIA_TYPE_TABLE.get(fileName.substring(extDot + 1));
+                }
+            }
+        }
+        
+        TikaContent tikaContent = tika.extract(att.getObject(InputStream.class),
+                                               mediaType);
         processStream(async, new TikaReceiver(tikaContent));
     }