You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by mr...@apache.org on 2007/01/18 15:42:42 UTC
svn commit: r497449 -
/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java
Author: mreutegg
Date: Thu Jan 18 06:42:41 2007
New Revision: 497449
URL: http://svn.apache.org/viewvc?view=rev&rev=497449
Log:
JCR-390: Move text extraction into a background thread
- minimize memory usage of text extractors
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java
Modified: jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java?view=diff&rev=497449&r1=497448&r2=497449
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/java/org/apache/jackrabbit/core/query/lucene/TextExtractorJob.java Thu Jan 18 06:42:41 2007
@@ -19,12 +19,20 @@
import EDU.oswego.cs.dl.util.concurrent.FutureResult;
import EDU.oswego.cs.dl.util.concurrent.Callable;
import org.apache.jackrabbit.extractor.TextExtractor;
+import org.apache.jackrabbit.util.LazyFileInputStream;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import java.io.InputStream;
import java.io.Reader;
import java.io.IOException;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.io.BufferedWriter;
+import java.io.InputStreamReader;
+import java.io.StringReader;
import java.lang.reflect.InvocationTargetException;
/**
@@ -34,6 +42,11 @@
public class TextExtractorJob extends FutureResult implements Runnable {
/**
+ * UTF-8 encoding.
+ */
+ private static final String ENCODING_UTF8 = "UTF-8";
+
+ /**
* The logger instance for this class.
*/
private static final Logger log = LoggerFactory.getLogger(TextExtractorJob.class);
@@ -49,6 +62,11 @@
private final String type;
/**
+ * Set to <code>true</code> if this job timed out.
+ */
+ private transient boolean timedOut = false;
+
+ /**
* <code>true</code> if this extractor job has been flaged as discarded.
*/
private transient boolean discarded = false;
@@ -71,9 +89,14 @@
this.cmd = setter(new Callable() {
public Object call() throws Exception {
Reader r = extractor.extractText(stream, type, encoding);
- if (discarded && r != null) {
- r.close();
- r = null;
+ if (r != null) {
+ if (discarded) {
+ r.close();
+ r = null;
+ } else if (timedOut) {
+ // spool a temp file to save memory
+ r = getSwappedOutReader(r);
+ }
}
return r;
}
@@ -100,6 +123,7 @@
if (timeout > 0) {
log.info("Text extraction for {} timed out (>{}ms).",
type, new Long(timeout));
+ timedOut = true;
}
} catch (InvocationTargetException e) {
// extraction failed
@@ -144,5 +168,82 @@
public void run() {
// forward to command
cmd.run();
+ }
+
+ //----------------------------< internal >----------------------------------
+
+ /**
+ * Returns a <code>Reader</code> for <code>r</code> using a temp file.
+ *
+ * @param r the reader to swap out into a temp file.
+ * @return a reader to the temp file.
+ */
+ private Reader getSwappedOutReader(Reader r) {
+ final File temp;
+ try {
+ temp = File.createTempFile("extractor", null);
+ } catch (IOException e) {
+ // unable to create temp file
+ // return reader as is
+ return r;
+ }
+ Writer out;
+ try {
+ out = new BufferedWriter(new OutputStreamWriter(
+ new FileOutputStream(temp), ENCODING_UTF8));
+ } catch (IOException e) {
+ // should never happend actually
+ if (!temp.delete()) {
+ temp.deleteOnExit();
+ }
+ return r;
+ }
+
+ // spool into temp file
+ char[] buffer = new char[1024];
+ int len;
+ InputStream in = null;
+ try {
+ try {
+ while ((len = r.read(buffer)) >= 0) {
+ out.write(buffer, 0, len);
+ }
+ out.close();
+ } finally {
+ r.close();
+ }
+ in = new LazyFileInputStream(temp);
+
+ return new InputStreamReader(in, ENCODING_UTF8) {
+ public void close() throws IOException {
+ super.close();
+ // delete file
+ if (!temp.delete()) {
+ temp.deleteOnExit();
+ }
+ }
+ };
+ } catch (IOException e) {
+ // do some clean up
+ try {
+ out.close();
+ } catch (IOException e1) {
+ // ignore
+ }
+
+ if (in != null) {
+ try {
+ in.close();
+ } catch (IOException e1) {
+ // ignore
+ }
+ }
+
+ if (!temp.delete()) {
+ temp.deleteOnExit();
+ }
+ // use empty string reader as fallback
+ return new StringReader("");
+ }
}
}