You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/05 00:59:46 UTC

svn commit: r992698 - in /tika/trunk/tika-core/src/main/java/org/apache/tika: fork/ parser/

Author: jukka
Date: Sat Sep  4 22:59:46 2010
New Revision: 992698

URL: http://svn.apache.org/viewvc?rev=992698&view=rev
Log:
TIKA-416: Out-of-process text extraction

Move the OutOfProcess classes to a separate org.apache.tika.fork package as they'll be needing a bit more supporting code

Added:
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
      - copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
      - copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java
      - copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
      - copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java
Removed:
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java

Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Sat Sep  4 22:59:46 2010
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+package org.apache.tika.fork;
 
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
@@ -30,7 +30,7 @@ import java.util.Enumeration;
 import org.apache.tika.io.IOExceptionWithCause;
 import org.apache.tika.io.IOUtils;
 
-class OutOfProcessClient {
+class ForkClient {
 
     private final ClassLoader loader;
 
@@ -44,7 +44,7 @@ class OutOfProcessClient {
 
     private final InputStream error;
 
-    public OutOfProcessClient(ClassLoader loader) throws IOException {
+    public ForkClient(ClassLoader loader) throws IOException {
         this.loader = loader;
 
         this.directory = File.createTempFile("apache-tika-", "-oop");
@@ -53,12 +53,12 @@ class OutOfProcessClient {
 
         boolean ok = false;
         try {
-            copyClassToDirectory(OutOfProcessServer.class);
-            copyClassToDirectory(OutOfProcessSerializer.class);
+            copyClassToDirectory(ForkServer.class);
+            copyClassToDirectory(ForkSerializer.class);
 
             ProcessBuilder builder = new ProcessBuilder();
             builder.directory(directory);
-            builder.command("java", OutOfProcessServer.class.getName());
+            builder.command("java", ForkServer.class.getName());
             this.process = builder.start();
             this.output = new DataOutputStream(process.getOutputStream());
             this.input = new DataInputStream(process.getInputStream());
@@ -92,13 +92,13 @@ class OutOfProcessClient {
 
     public synchronized Object echo(Object message) throws IOException {
         consumeErrors();
-        output.write(OutOfProcessServer.ECHO);
-        OutOfProcessSerializer.serialize(output, message);
+        output.write(ForkServer.ECHO);
+        ForkSerializer.serialize(output, message);
         output.flush();
 
         readResponseType();
         try {
-            return OutOfProcessSerializer.deserialize(input, loader).toString();
+            return ForkSerializer.deserialize(input, loader).toString();
         } catch (ClassNotFoundException e) {
             throw new IOExceptionWithCause("Unable to read echo response", e);
         }
@@ -122,9 +122,9 @@ class OutOfProcessClient {
             int type = input.read();
             if (type == -1) {
                 throw new IOException("Unexpected end of stream encountered");
-            } else if (type == OutOfProcessServer.FIND_RESOURCE) {
+            } else if (type == ForkServer.FIND_RESOURCE) {
                 findResource(input.readUTF());
-            } else if (type == OutOfProcessServer.FIND_RESOURCES) {
+            } else if (type == ForkServer.FIND_RESOURCES) {
                 findResources(input.readUTF());
             } else {
                 return (byte) type;

Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java Sat Sep  4 22:59:46 2010
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+package org.apache.tika.fork;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -23,20 +23,24 @@ import java.util.Queue;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DelegatingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-public class OutOfProcessParser extends DelegatingParser {
+public class ForkParser extends DelegatingParser {
 
     private final ClassLoader loader;
 
-    private final Queue<OutOfProcessClient> pool =
-        new LinkedList<OutOfProcessClient>();
+    private final Queue<ForkClient> pool =
+        new LinkedList<ForkClient>();
 
     private int poolSize = 5;
 
     public static void main(String[] args) throws Exception {
-        OutOfProcessParser parser = new OutOfProcessParser(
+        ForkParser parser = new ForkParser(
                 Thread.currentThread().getContextClassLoader());
         try {
             ParseContext context = new ParseContext();
@@ -47,7 +51,7 @@ public class OutOfProcessParser extends 
         }
     }
 
-    public OutOfProcessParser(ClassLoader loader) {
+    public ForkParser(ClassLoader loader) {
         this.loader = loader;
     }
 
@@ -59,7 +63,7 @@ public class OutOfProcessParser extends 
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        OutOfProcessClient client = acquireClient();
+        ForkClient client = acquireClient();
         try {
             System.out.println(client.echo(getDelegateParser(context)));
         } finally {
@@ -68,23 +72,23 @@ public class OutOfProcessParser extends 
     }
 
     public synchronized void close() {
-        for (OutOfProcessClient client : pool) {
+        for (ForkClient client : pool) {
             client.close();
         }
         pool.clear();
         poolSize = 0;
     }
 
-    private synchronized OutOfProcessClient acquireClient()
+    private synchronized ForkClient acquireClient()
             throws IOException {
-        OutOfProcessClient client = pool.poll();
+        ForkClient client = pool.poll();
         if (client == null) {
-            client = new OutOfProcessClient(loader);
+            client = new ForkClient(loader);
         }
         return client;
     }
 
-    private synchronized void releaseClient(OutOfProcessClient client) {
+    private synchronized void releaseClient(ForkClient client) {
         if (pool.size() < poolSize) {
             pool.offer(client);
         } else {

Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java Sat Sep  4 22:59:46 2010
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+package org.apache.tika.fork;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -26,11 +26,11 @@ import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.ObjectStreamClass;
 
-class OutOfProcessSerializer extends ObjectInputStream {
+class ForkSerializer extends ObjectInputStream {
 
     private final ClassLoader loader;
 
-    public OutOfProcessSerializer(InputStream input, ClassLoader loader)
+    public ForkSerializer(InputStream input, ClassLoader loader)
             throws IOException {
         super(input);
         this.loader = loader;
@@ -62,7 +62,7 @@ class OutOfProcessSerializer extends Obj
         input.readFully(data);
 
         ObjectInputStream deserializer =
-            new OutOfProcessSerializer(new ByteArrayInputStream(data), loader);
+            new ForkSerializer(new ByteArrayInputStream(data), loader);
         return deserializer.readObject();
     }
 

Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java Sat Sep  4 22:59:46 2010
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.tika.parser;
+package org.apache.tika.fork;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -31,7 +31,7 @@ import java.util.Collections;
 import java.util.Enumeration;
 import java.util.List;
 
-class OutOfProcessServer extends ClassLoader {
+class ForkServer extends ClassLoader {
 
     public static final byte ERROR = -1;
 
@@ -44,8 +44,8 @@ class OutOfProcessServer extends ClassLo
     public static final byte FIND_RESOURCES = 3;
 
     public static void main(String[] args) throws Exception {
-        OutOfProcessServer server =
-            new OutOfProcessServer(System.in, System.out);
+        ForkServer server =
+            new ForkServer(System.in, System.out);
         Thread.currentThread().setContextClassLoader(server);
 
         // Redirect standard input and output streams to prevent
@@ -62,7 +62,7 @@ class OutOfProcessServer extends ClassLo
 
     private int count = 0;
 
-    public OutOfProcessServer(InputStream input, OutputStream output)
+    public ForkServer(InputStream input, OutputStream output)
             throws IOException {
         this.input = new DataInputStream(input);
         this.output = new DataOutputStream(output);
@@ -74,12 +74,12 @@ class OutOfProcessServer extends ClassLo
             if (b == ECHO) {
                 try {
                     Object message =
-                        OutOfProcessSerializer.deserialize(input, this);
+                        ForkSerializer.deserialize(input, this);
                     output.write(ECHO);
-                    OutOfProcessSerializer.serialize(output, "echo: " + message);
+                    ForkSerializer.serialize(output, "echo: " + message);
                 } catch (ClassNotFoundException e) {
                     output.write(ERROR);
-                    OutOfProcessSerializer.serialize(output, e);
+                    ForkSerializer.serialize(output, e);
                 }
                 output.flush();
             }