You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/01/18 16:21:40 UTC
svn commit: r1060412 - in
/tika/trunk/tika-core/src/main/java/org/apache/tika/fork: ForkClient.java
ForkParser.java ForkServer.java
Author: jukka
Date: Tue Jan 18 15:21:40 2011
New Revision: 1060412
URL: http://svn.apache.org/viewvc?rev=1060412&view=rev
Log:
TIKA-416: Out-of-process text extraction
Send the class loader and server object only once per forked server process for some massive speed improvements.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?rev=1060412&r1=1060411&r2=1060412&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Tue Jan 18 15:21:40 2011
@@ -35,7 +35,7 @@ class ForkClient {
private final String java = "java"; // TODO: Make configurable
- private final ClassLoader loader;
+ private final List<ForkResource> resources = new ArrayList<ForkResource>();
private final File directory;
@@ -47,9 +47,7 @@ class ForkClient {
private final InputStream error;
- public ForkClient(ClassLoader loader) throws IOException {
- this.loader = loader;
-
+ public ForkClient(ClassLoader loader, Object object) throws IOException {
this.directory = File.createTempFile("apache-tika-", "-fork");
directory.delete();
directory.mkdir();
@@ -69,6 +67,9 @@ class ForkClient {
this.input = new DataInputStream(process.getInputStream());
this.error = process.getErrorStream();
+ sendObject(loader, resources);
+ sendObject(object, resources);
+
ok = true;
} finally {
if (!ok) {
@@ -104,17 +105,14 @@ class ForkClient {
}
}
- public synchronized void call(
- Object object, String method, Object... args)
+ public synchronized void call(String method, Object... args)
throws IOException {
- List<ForkResource> resources = new ArrayList<ForkResource>();
- sendObject(loader, resources);
- sendObject(object, resources);
+ List<ForkResource> r = new ArrayList<ForkResource>(resources);
output.writeUTF("parse");
for (int i = 0; i < args.length; i++) {
- sendObject(args[i], resources);
+ sendObject(args[i], r);
}
- waitForResponse(resources);
+ waitForResponse(r);
}
/**
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java?rev=1060412&r1=1060411&r2=1060412&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java Tue Jan 18 15:21:40 2011
@@ -75,7 +75,7 @@ public class ForkParser implements Parse
throws IOException, SAXException, TikaException {
ForkClient client = acquireClient();
try {
- client.call(parser, "parse", stream, handler, metadata, context);
+ client.call("parse", stream, handler, metadata, context);
} finally {
releaseClient(client);
}
@@ -99,7 +99,7 @@ public class ForkParser implements Parse
throws IOException {
ForkClient client = pool.poll();
if (client == null) {
- client = new ForkClient(loader);
+ client = new ForkClient(loader, parser);
}
return client;
}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java?rev=1060412&r1=1060411&r2=1060412&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java Tue Jan 18 15:21:40 2011
@@ -73,12 +73,12 @@ class ForkServer implements Runnable {
public void run() {
try {
- while (true) {
- ClassLoader loader = (ClassLoader) readObject(
- ForkServer.class.getClassLoader());
- Thread.currentThread().setContextClassLoader(loader);
+ ClassLoader loader = (ClassLoader) readObject(
+ ForkServer.class.getClassLoader());
+ Thread.currentThread().setContextClassLoader(loader);
- Object object = readObject(loader);
+ Object object = readObject(loader);
+ while (true) {
Method method = getMethod(object, input.readUTF());
Object[] args = new Object[method.getParameterTypes().length];
for (int i = 0; i < args.length; i++) {