You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/01/14 17:35:01 UTC
svn commit: r1059061 - in
/tika/trunk/tika-core/src/main/java/org/apache/tika: fork/ForkClient.java
mime/MediaTypeRegistry.java
Author: jukka
Date: Fri Jan 14 16:35:00 2011
New Revision: 1059061
URL: http://svn.apache.org/viewvc?rev=1059061&view=rev
Log:
TIKA-416: Out-of-process text extraction
Some javadocs. Make MediaTypeRegistry serializable.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?rev=1059061&r1=1059060&r2=1059061&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Fri Jan 14 16:35:00 2011
@@ -19,7 +19,6 @@ package org.apache.tika.fork;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -72,8 +71,16 @@ class ForkClient {
}
}
- private void copyClassToDirectory(Class<?> klass)
- throws FileNotFoundException, IOException {
+ /**
+ * Copies the <code>.class</code> file of the given class to the
+ * directory from where the forked server process can load it
+ * during startup before setting up the stdin/out communication
+ * channel with the parent process.
+ *
+ * @param klass the class to be copied
+ * @throws IOException if the class could not be copied
+ */
+ private void copyClassToDirectory(Class<?> klass) throws IOException {
String path = klass.getName().replace('.', '/') + ".class";
InputStream input = loader.getResourceAsStream(path);
try {
@@ -123,16 +130,25 @@ class ForkClient {
if (type == -1) {
throw new IOException("Unexpected end of stream encountered");
} else if (type == ForkServer.FIND_RESOURCE) {
- findResource(input.readUTF());
+ sendResource(input.readUTF());
} else if (type == ForkServer.FIND_RESOURCES) {
- findResources(input.readUTF());
+ sendResources(input.readUTF());
} else {
return (byte) type;
}
}
}
- private void findResource(String name) throws IOException {
+ /**
+ * Sends the named resource to the forked server process over the
+ * stdin/out communication channel. The resource stream is preceded
+ * with a boolean <code>true</code> value if the resource was found,
+ * otherwise just a boolean <code>false</code> value is written.
+ *
+ * @param name resource name
+ * @throws IOException if the resource could not be sent
+ */
+ private void sendResource(String name) throws IOException {
InputStream stream = loader.getResourceAsStream(name);
if (stream != null) {
output.writeBoolean(true);
@@ -143,7 +159,17 @@ class ForkClient {
output.flush();
}
- private void findResources(String name) throws IOException {
+ /**
+ * Sends all the named resources to the forked server process over the
+ * stdin/out communication channel. Each resource stream is preceded
+ * with a boolean <code>true</code> value, and a single boolean
+ * <code>false</code> value is written when no longer resources
+ * are available.
+ *
+ * @param name resource name
+ * @throws IOException if the resources could not be sent
+ */
+ private void sendResources(String name) throws IOException {
Enumeration<URL> resources = loader.getResources(name);
while (resources.hasMoreElements()) {
output.writeBoolean(true);
@@ -153,6 +179,19 @@ class ForkClient {
output.flush();
}
+ /**
+ * Sends the given byte stream to the forked server process over the
+ * stdin/out communication channel. The stream is sent in chunks of
+ * less than 64kB, each preceded by a short value that indicates the
+ * length of the following chunk. A zero short value is sent at the
+ * end to signify the end of the stream.
+ * <p>
+ * The stream is guaranteed to be closed by this method, regardless of
+ * the way it returns.
+ *
+ * @param stream the stream to be sent
+ * @throws IOException if the stream could not be sent
+ */
private void writeAndCloseStream(InputStream stream) throws IOException {
try {
byte[] buffer = new byte[0xffff];
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java?rev=1059061&r1=1059060&r2=1059061&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MediaTypeRegistry.java Fri Jan 14 16:35:00 2011
@@ -16,6 +16,7 @@
*/
package org.apache.tika.mime;
+import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.SortedSet;
@@ -26,7 +27,10 @@ import org.apache.tika.config.TikaConfig
/**
* Registry of known Internet media types.
*/
-public class MediaTypeRegistry {
+public class MediaTypeRegistry implements Serializable {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = 4710974869988895410L;
/**
* Returns the built-in media type registry included in Tika.