You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/01/14 17:35:01 UTC

svn commit: r1059061 - in /tika/trunk/tika-core/src/main/java/org/apache/tika: fork/ForkClient.java mime/MediaTypeRegistry.java

Author: jukka
Date: Fri Jan 14 16:35:00 2011
New Revision: 1059061

URL: http://svn.apache.org/viewvc?rev=1059061&view=rev
Log:
TIKA-416: Out-of-process text extraction

Some javadocs. Make MediaTypeRegistry serializable.

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?rev=1059061&r1=1059060&r2=1059061&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Fri Jan 14 16:35:00 2011
@@ -19,7 +19,6 @@ package org.apache.tika.fork;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
@@ -72,8 +71,16 @@ class ForkClient {
         }
     }
 
-    private void copyClassToDirectory(Class<?> klass)
-            throws FileNotFoundException, IOException {
+    /**
+     * Copies the <code>.class</code> file of the given class to the
+     * directory from where the forked server process can load it
+     * during startup before setting up the stdin/out communication
+     * channel with the parent process.
+     *
+     * @param klass the class to be copied
+     * @throws IOException if the class could not be copied
+     */
+    private void copyClassToDirectory(Class<?> klass) throws IOException {
         String path = klass.getName().replace('.', '/') + ".class";
         InputStream input = loader.getResourceAsStream(path);
         try {
@@ -123,16 +130,25 @@ class ForkClient {
             if (type == -1) {
                 throw new IOException("Unexpected end of stream encountered");
             } else if (type == ForkServer.FIND_RESOURCE) {
-                findResource(input.readUTF());
+                sendResource(input.readUTF());
             } else if (type == ForkServer.FIND_RESOURCES) {
-                findResources(input.readUTF());
+                sendResources(input.readUTF());
             } else {
                 return (byte) type;
             }
         }
     }
 
-    private void findResource(String name) throws IOException {
+    /**
+     * Sends the named resource to the forked server process over the
+     * stdin/out communication channel. The resource stream is preceded
+     * with a boolean <code>true</code> value if the resource was found,
+     * otherwise just a boolean <code>false</code> value is written.
+     *
+     * @param name resource name
+     * @throws IOException if the resource could not be sent
+     */
+    private void sendResource(String name) throws IOException {
         InputStream stream = loader.getResourceAsStream(name);
         if (stream != null) {
             output.writeBoolean(true);
@@ -143,7 +159,17 @@ class ForkClient {
         output.flush();
     }
 
-    private void findResources(String name) throws IOException {
+    /**
+     * Sends all the named resources to the forked server process over the
+     * stdin/out communication channel. Each resource stream is preceded
+     * with a boolean <code>true</code> value, and a single boolean
+     * <code>false</code> value is written when no longer resources
+     * are available.
+     *
+     * @param name resource name
+     * @throws IOException if the resources could not be sent
+     */
+    private void sendResources(String name) throws IOException {
         Enumeration<URL> resources = loader.getResources(name);
         while (resources.hasMoreElements()) {
             output.writeBoolean(true);
@@ -153,6 +179,19 @@ class ForkClient {
         output.flush();
     }
 
+    /**
+     * Sends the given byte stream to the forked server process over the
+     * stdin/out communication channel. The stream is sent in chunks of
+     * less than 64kB, each preceded by a short value that indicates the
+     * length of the following chunk. A zero short value is sent at the
+     * end to signify the end of the stream.
+     * <p>
+     * The stream is guaranteed to be closed by this method, regardless of
+     * the way it returns.
+     *
+     * @param stream the stream to be sent
+     * @throws IOException if the stream could not be sent
+     */
     private void writeAndCloseStream(InputStream stream) throws IOException {
         try {
             byte[] buffer = new byte[0xffff];

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java?rev=1059061&r1=1059060&r2=1059061&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java Fri Jan 14 16:35:00 2011
@@ -16,6 +16,7 @@
  */
 package org.apache.tika.mime;
 
+import java.io.Serializable;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.SortedSet;
@@ -26,7 +27,10 @@ import org.apache.tika.config.TikaConfig
 /**
  * Registry of known Internet media types.
  */
-public class MediaTypeRegistry {
+public class MediaTypeRegistry implements Serializable {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = 4710974869988895410L;
 
     /**
      * Returns the built-in media type registry included in Tika.