You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/01/18 16:21:40 UTC

svn commit: r1060412 - in /tika/trunk/tika-core/src/main/java/org/apache/tika/fork: ForkClient.java ForkParser.java ForkServer.java

Author: jukka
Date: Tue Jan 18 15:21:40 2011
New Revision: 1060412

URL: http://svn.apache.org/viewvc?rev=1060412&view=rev
Log:
TIKA-416: Out-of-process text extraction

Send the class loader and server object only once per forked server process for some massive speed improvements.

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?rev=1060412&r1=1060411&r2=1060412&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Tue Jan 18 15:21:40 2011
@@ -35,7 +35,7 @@ class ForkClient {
 
     private final String java = "java"; // TODO: Make configurable
 
-    private final ClassLoader loader;
+    private final List<ForkResource> resources = new ArrayList<ForkResource>();
 
     private final File directory;
 
@@ -47,9 +47,7 @@ class ForkClient {
 
     private final InputStream error;
 
-    public ForkClient(ClassLoader loader) throws IOException {
-        this.loader = loader;
-
+    public ForkClient(ClassLoader loader, Object object) throws IOException {
         this.directory = File.createTempFile("apache-tika-", "-fork");
         directory.delete();
         directory.mkdir();
@@ -69,6 +67,9 @@ class ForkClient {
             this.input = new DataInputStream(process.getInputStream());
             this.error = process.getErrorStream();
 
+            sendObject(loader, resources);
+            sendObject(object, resources);
+
             ok = true;
         } finally {
             if (!ok) {
@@ -104,17 +105,14 @@ class ForkClient {
         }
     }
 
-    public synchronized void call(
-            Object object, String method, Object... args)
+    public synchronized void call(String method, Object... args)
             throws IOException {
-        List<ForkResource> resources = new ArrayList<ForkResource>();
-        sendObject(loader, resources);
-        sendObject(object, resources);
+        List<ForkResource> r = new ArrayList<ForkResource>(resources);
         output.writeUTF("parse");
         for (int i = 0; i < args.length; i++) {
-            sendObject(args[i], resources);
+            sendObject(args[i], r);
         }
-        waitForResponse(resources);
+        waitForResponse(r);
     }
 
     /**

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java?rev=1060412&r1=1060411&r2=1060412&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java Tue Jan 18 15:21:40 2011
@@ -75,7 +75,7 @@ public class ForkParser implements Parse
             throws IOException, SAXException, TikaException {
         ForkClient client = acquireClient();
         try {
-            client.call(parser, "parse", stream, handler, metadata, context);
+            client.call("parse", stream, handler, metadata, context);
         } finally {
             releaseClient(client);
         }
@@ -99,7 +99,7 @@ public class ForkParser implements Parse
             throws IOException {
         ForkClient client = pool.poll();
         if (client == null) {
-            client = new ForkClient(loader);
+            client = new ForkClient(loader, parser);
         }
         return client;
     }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java?rev=1060412&r1=1060411&r2=1060412&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java Tue Jan 18 15:21:40 2011
@@ -73,12 +73,12 @@ class ForkServer implements Runnable {
 
     public void run() {
         try {
-            while (true) {
-                ClassLoader loader = (ClassLoader) readObject(
-                        ForkServer.class.getClassLoader());
-                Thread.currentThread().setContextClassLoader(loader);
+            ClassLoader loader = (ClassLoader) readObject(
+                    ForkServer.class.getClassLoader());
+            Thread.currentThread().setContextClassLoader(loader);
 
-                Object object = readObject(loader);
+            Object object = readObject(loader);
+            while (true) {
                 Method method = getMethod(object, input.readUTF());
                 Object[] args = new Object[method.getParameterTypes().length];
                 for (int i = 0; i < args.length; i++) {