You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/05 00:59:46 UTC
svn commit: r992698 - in
/tika/trunk/tika-core/src/main/java/org/apache/tika: fork/ parser/
Author: jukka
Date: Sat Sep 4 22:59:46 2010
New Revision: 992698
URL: http://svn.apache.org/viewvc?rev=992698&view=rev
Log:
TIKA-416: Out-of-process text extraction
Move the OutOfProcess classes to a separate org.apache.tika.fork package as they'll be needing a bit more supporting code
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
- copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
- copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java
- copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java
- copied, changed from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java
Removed:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java
Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessClient.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java Sat Sep 4 22:59:46 2010
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser;
+package org.apache.tika.fork;
import java.io.DataInputStream;
import java.io.DataOutputStream;
@@ -30,7 +30,7 @@ import java.util.Enumeration;
import org.apache.tika.io.IOExceptionWithCause;
import org.apache.tika.io.IOUtils;
-class OutOfProcessClient {
+class ForkClient {
private final ClassLoader loader;
@@ -44,7 +44,7 @@ class OutOfProcessClient {
private final InputStream error;
- public OutOfProcessClient(ClassLoader loader) throws IOException {
+ public ForkClient(ClassLoader loader) throws IOException {
this.loader = loader;
this.directory = File.createTempFile("apache-tika-", "-oop");
@@ -53,12 +53,12 @@ class OutOfProcessClient {
boolean ok = false;
try {
- copyClassToDirectory(OutOfProcessServer.class);
- copyClassToDirectory(OutOfProcessSerializer.class);
+ copyClassToDirectory(ForkServer.class);
+ copyClassToDirectory(ForkSerializer.class);
ProcessBuilder builder = new ProcessBuilder();
builder.directory(directory);
- builder.command("java", OutOfProcessServer.class.getName());
+ builder.command("java", ForkServer.class.getName());
this.process = builder.start();
this.output = new DataOutputStream(process.getOutputStream());
this.input = new DataInputStream(process.getInputStream());
@@ -92,13 +92,13 @@ class OutOfProcessClient {
public synchronized Object echo(Object message) throws IOException {
consumeErrors();
- output.write(OutOfProcessServer.ECHO);
- OutOfProcessSerializer.serialize(output, message);
+ output.write(ForkServer.ECHO);
+ ForkSerializer.serialize(output, message);
output.flush();
readResponseType();
try {
- return OutOfProcessSerializer.deserialize(input, loader).toString();
+ return ForkSerializer.deserialize(input, loader).toString();
} catch (ClassNotFoundException e) {
throw new IOExceptionWithCause("Unable to read echo response", e);
}
@@ -122,9 +122,9 @@ class OutOfProcessClient {
int type = input.read();
if (type == -1) {
throw new IOException("Unexpected end of stream encountered");
- } else if (type == OutOfProcessServer.FIND_RESOURCE) {
+ } else if (type == ForkServer.FIND_RESOURCE) {
findResource(input.readUTF());
- } else if (type == OutOfProcessServer.FIND_RESOURCES) {
+ } else if (type == ForkServer.FIND_RESOURCES) {
findResources(input.readUTF());
} else {
return (byte) type;
Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java Sat Sep 4 22:59:46 2010
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser;
+package org.apache.tika.fork;
import java.io.IOException;
import java.io.InputStream;
@@ -23,20 +23,24 @@ import java.util.Queue;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.DelegatingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-public class OutOfProcessParser extends DelegatingParser {
+public class ForkParser extends DelegatingParser {
private final ClassLoader loader;
- private final Queue<OutOfProcessClient> pool =
- new LinkedList<OutOfProcessClient>();
+ private final Queue<ForkClient> pool =
+ new LinkedList<ForkClient>();
private int poolSize = 5;
public static void main(String[] args) throws Exception {
- OutOfProcessParser parser = new OutOfProcessParser(
+ ForkParser parser = new ForkParser(
Thread.currentThread().getContextClassLoader());
try {
ParseContext context = new ParseContext();
@@ -47,7 +51,7 @@ public class OutOfProcessParser extends
}
}
- public OutOfProcessParser(ClassLoader loader) {
+ public ForkParser(ClassLoader loader) {
this.loader = loader;
}
@@ -59,7 +63,7 @@ public class OutOfProcessParser extends
InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
- OutOfProcessClient client = acquireClient();
+ ForkClient client = acquireClient();
try {
System.out.println(client.echo(getDelegateParser(context)));
} finally {
@@ -68,23 +72,23 @@ public class OutOfProcessParser extends
}
public synchronized void close() {
- for (OutOfProcessClient client : pool) {
+ for (ForkClient client : pool) {
client.close();
}
pool.clear();
poolSize = 0;
}
- private synchronized OutOfProcessClient acquireClient()
+ private synchronized ForkClient acquireClient()
throws IOException {
- OutOfProcessClient client = pool.poll();
+ ForkClient client = pool.poll();
if (client == null) {
- client = new OutOfProcessClient(loader);
+ client = new ForkClient(loader);
}
return client;
}
- private synchronized void releaseClient(OutOfProcessClient client) {
+ private synchronized void releaseClient(ForkClient client) {
if (pool.size() < poolSize) {
pool.offer(client);
} else {
Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessSerializer.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkSerializer.java Sat Sep 4 22:59:46 2010
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser;
+package org.apache.tika.fork;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -26,11 +26,11 @@ import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.ObjectStreamClass;
-class OutOfProcessSerializer extends ObjectInputStream {
+class ForkSerializer extends ObjectInputStream {
private final ClassLoader loader;
- public OutOfProcessSerializer(InputStream input, ClassLoader loader)
+ public ForkSerializer(InputStream input, ClassLoader loader)
throws IOException {
super(input);
this.loader = loader;
@@ -62,7 +62,7 @@ class OutOfProcessSerializer extends Obj
input.readFully(data);
ObjectInputStream deserializer =
- new OutOfProcessSerializer(new ByteArrayInputStream(data), loader);
+ new ForkSerializer(new ByteArrayInputStream(data), loader);
return deserializer.readObject();
}
Copied: tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java (from r992107, tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java)
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java?p2=tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java&p1=tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java&r1=992107&r2=992698&rev=992698&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/OutOfProcessServer.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkServer.java Sat Sep 4 22:59:46 2010
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser;
+package org.apache.tika.fork;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -31,7 +31,7 @@ import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
-class OutOfProcessServer extends ClassLoader {
+class ForkServer extends ClassLoader {
public static final byte ERROR = -1;
@@ -44,8 +44,8 @@ class OutOfProcessServer extends ClassLo
public static final byte FIND_RESOURCES = 3;
public static void main(String[] args) throws Exception {
- OutOfProcessServer server =
- new OutOfProcessServer(System.in, System.out);
+ ForkServer server =
+ new ForkServer(System.in, System.out);
Thread.currentThread().setContextClassLoader(server);
// Redirect standard input and output streams to prevent
@@ -62,7 +62,7 @@ class OutOfProcessServer extends ClassLo
private int count = 0;
- public OutOfProcessServer(InputStream input, OutputStream output)
+ public ForkServer(InputStream input, OutputStream output)
throws IOException {
this.input = new DataInputStream(input);
this.output = new DataOutputStream(output);
@@ -74,12 +74,12 @@ class OutOfProcessServer extends ClassLo
if (b == ECHO) {
try {
Object message =
- OutOfProcessSerializer.deserialize(input, this);
+ ForkSerializer.deserialize(input, this);
output.write(ECHO);
- OutOfProcessSerializer.serialize(output, "echo: " + message);
+ ForkSerializer.serialize(output, "echo: " + message);
} catch (ClassNotFoundException e) {
output.write(ERROR);
- OutOfProcessSerializer.serialize(output, e);
+ ForkSerializer.serialize(output, e);
}
output.flush();
}