You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/01 00:37:03 UTC
svn commit: r1606942 - in
/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces:
TempFileCharacterInput.java TempFileInput.java
Author: kwright
Date: Mon Jun 30 22:37:03 2014
New Revision: 1606942
URL: http://svn.apache.org/r1606942
Log:
Perform improvement: cache in memory if small enough
Modified:
manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java
Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java?rev=1606942&r1=1606941&r2=1606942&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java Mon Jun 30 22:37:03 2014
@@ -26,19 +26,23 @@ import org.apache.manifoldcf.core.system
/** This class represents a temporary file character input
* stream. Call the "done" method to clean up the
* file when done.
-* NOTE: The implied flow of this method is to be handled
+* NOTE: The implied flow of this method is to be handed
* a file that has already been created by some means. The
* file must be a dedicated temporary file, which can be
-* destroyed when the data has been used.
+* destroyed when the data has been used. However, this class can also
+* buffer data in memory if the data is not too large (that is, less than a
+* supplied cutoff value).
*/
public class TempFileCharacterInput extends CharacterInput
{
public static final String _rcsid = "@(#)$Id: TempFileCharacterInput.java 988245 2010-08-23 18:39:35Z kwright $";
protected File file;
+ protected byte[] inMemoryBuffer;
protected final static int CHUNK_SIZE = 65536;
-
+ protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
+
/** Construct from a non-length-delimited reader.
*@param is is a reader to transfer from, to the end of the data. This will, as a side effect, also calculate the character length
* and hash value for the data.
@@ -57,81 +61,165 @@ public class TempFileCharacterInput exte
public TempFileCharacterInput(Reader is, long length)
throws ManifoldCFException
{
+ this(is,length,DEFAULT_MAX_MEM_SIZE);
+ }
+
+ /** Construct from a length-delimited reader.
+ *@param is is a reader to transfer from, to the end of the data. This will, as a side effect, also calculate the character length
+ * and hash value for the data.
+ *@param length is the length limit to transfer, or -1 if no limit
+ *@param maxInMemoryLength is the maximum size to keep in memory, before using a backing File object. The amount possibly
+ * saved in memory will be guaranteed less than this size.
+ */
+ public TempFileCharacterInput(Reader is, long length, int maxInMemoryLength)
+ throws ManifoldCFException
+ {
super();
+
+
+ // Before we do anything else, we read the first chunk. This will allow
+ // us to determine if we're going to buffer the data in memory or not. However,
+ // it may need to be read in chunks, since there's no guarantee it will come in
+ // in the size requested.
+ int chunkSize = CHUNK_SIZE;
+
+ char[] buffer = new char[chunkSize];
+ int chunkTotal = 0;
+ boolean eofSeen = false;
try
{
- // Create a temporary file to put the stuff in
- File outfile = File.createTempFile("_MC_","");
- try
+ while (true)
{
- // Register the file for autodeletion, using our infrastructure.
- ManifoldCF.addFile(outfile);
- // deleteOnExit() causes memory leakage!
- // outfile.deleteOnExit();
-
- // Set up hash digest and character length counter before we start anything.
- java.security.MessageDigest md = ManifoldCF.startHash();
+ int chunkAmount;
+ if (length == -1L || length > chunkSize)
+ chunkAmount = chunkSize-chunkTotal;
+ else
+ {
+ chunkAmount = (int)(length-chunkTotal);
+ eofSeen = true;
+ }
+ if (chunkAmount == 0)
+ break;
+ int readsize = is.read(buffer,chunkTotal,chunkAmount);
+ if (readsize == -1)
+ {
+ eofSeen = true;
+ break;
+ }
+ chunkTotal += readsize;
+ }
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (IOException e)
+ {
+ throw new ManifoldCFException("Cannot read character stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+ }
+
+ // Set up hash digest, and calculate the initial hash.
+ java.security.MessageDigest md = ManifoldCF.startHash();
+ String chunkString = new String(buffer,0,chunkTotal);
+ ManifoldCF.addToHash(md,chunkString);
+
+ // In order to compute the byte length, we need to convert to a byte array, which is
+ // also our final form for in-memory storage. But we don't want to do the work
+ // unless there's a chance it will be needed.
+ byte[] byteBuffer;
+ if (eofSeen)
+ byteBuffer = chunkString.getBytes(StandardCharsets.UTF_8);
+ else
+ byteBuffer = null;
- FileOutputStream outStream = new FileOutputStream(outfile);
- // Create a Writer corresponding to the file output stream, and encode using utf-8
- OutputStreamWriter outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
+ if (eofSeen && byteBuffer.length <= maxInMemoryLength)
+ {
+ // Buffer locally; don't create a temp file
+ file = null;
+ inMemoryBuffer = byteBuffer;
+ charLength = chunkTotal;
+ hashValue = ManifoldCF.getHashValue(md);
+ }
+ else
+ {
+ inMemoryBuffer = null;
+ // Create a temporary file!
+ long totalMoved = 0;
+ try
+ {
+ // Create a temporary file to put the stuff in
+ File outfile = File.createTempFile("_MC_","");
try
{
- char[] buffer = new char[CHUNK_SIZE];
- long totalMoved = 0;
- while (true)
+ // Register the file for autodeletion, using our infrastructure.
+ ManifoldCF.addFile(outfile);
+ // deleteOnExit() causes memory leakage!
+ // outfile.deleteOnExit();
+
+ FileOutputStream outStream = new FileOutputStream(outfile);
+ // Create a Writer corresponding to the file output stream, and encode using utf-8
+ OutputStreamWriter outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
+ try
{
- int moveAmount;
- if (length == -1L || length-totalMoved > CHUNK_SIZE)
- moveAmount = CHUNK_SIZE;
- else
- moveAmount = (int)(length-totalMoved);
- if (moveAmount == 0)
- break;
- // Read character data in 64K chunks
- int readsize = is.read(buffer,0,moveAmount);
- if (readsize == -1)
- break;
- outWriter.write(buffer,0,readsize);
- ManifoldCF.addToHash(md,new String(buffer,0,readsize));
- totalMoved += readsize;
+ // Transfor what we've already read.
+ outWriter.write(buffer,0,chunkTotal);
+ totalMoved += chunkTotal;
+ // Now, transfer the remainder
+ while (true)
+ {
+ int moveAmount;
+ if (length == -1L || length-totalMoved > chunkSize)
+ moveAmount = chunkSize;
+ else
+ moveAmount = (int)(length-totalMoved);
+ if (moveAmount == 0)
+ break;
+ // Read character data in 64K chunks
+ int readsize = is.read(buffer,0,moveAmount);
+ if (readsize == -1)
+ break;
+ outWriter.write(buffer,0,readsize);
+ ManifoldCF.addToHash(md,new String(buffer,0,readsize));
+ totalMoved += readsize;
+ }
+
+ }
+ finally
+ {
+ outWriter.close();
}
+ // Now, create the input stream.
+ // Save the file name
+ file = outfile;
charLength = totalMoved;
hashValue = ManifoldCF.getHashValue(md);
+
}
- finally
+ catch (Throwable e)
{
- outWriter.close();
+ // Delete the temp file we created on any error condition
+ // outfile.delete();
+ ManifoldCF.deleteFile(outfile);
+ if (e instanceof Error)
+ throw (Error)e;
+ if (e instanceof RuntimeException)
+ throw (RuntimeException)e;
+ if (e instanceof Exception)
+ throw (Exception)e;
+ throw new Exception("Unexpected throwable: "+e.getMessage(),e);
}
-
- // Now, create the input stream.
- // Save the file name
- file = outfile;
-
}
- catch (Throwable e)
+ catch (InterruptedIOException e)
{
- // Delete the temp file we created on any error condition
- // outfile.delete();
- ManifoldCF.deleteFile(outfile);
- if (e instanceof Error)
- throw (Error)e;
- if (e instanceof RuntimeException)
- throw (RuntimeException)e;
- if (e instanceof Exception)
- throw (Exception)e;
- throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (Exception e)
+ {
+ throw new ManifoldCFException("Cannot write temporary file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
}
}
- catch (InterruptedIOException e)
- {
- throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
- }
- catch (Exception e)
- {
- throw new ManifoldCFException("Cannot write temporary file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
- }
+
}
@@ -141,6 +229,7 @@ public class TempFileCharacterInput exte
public TempFileCharacterInput(File tempFile)
{
super();
+ inMemoryBuffer = null;
file = tempFile;
ManifoldCF.addFile(file);
// deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
@@ -168,6 +257,10 @@ public class TempFileCharacterInput exte
throw new ManifoldCFException("No such file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
}
}
+ else if (inMemoryBuffer != null)
+ {
+ return new ByteArrayInputStream(inMemoryBuffer);
+ }
return null;
}
@@ -178,6 +271,8 @@ public class TempFileCharacterInput exte
{
if (file != null)
return file.length();
+ else if (inMemoryBuffer != null)
+ return inMemoryBuffer.length;
return 0L;
}
@@ -185,15 +280,22 @@ public class TempFileCharacterInput exte
protected void openStream()
throws ManifoldCFException
{
- try
+ if (file != null)
{
- // Open the file and create a stream.
- InputStream binaryStream = new FileInputStream(file);
- stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
+ try
+ {
+ // Open the file and create a stream.
+ InputStream binaryStream = new FileInputStream(file);
+ stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
+ }
+ catch (FileNotFoundException e)
+ {
+ throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+ }
}
- catch (FileNotFoundException e)
+ else if (inMemoryBuffer != null)
{
- throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+ stream = new InputStreamReader(new ByteArrayInputStream(inMemoryBuffer),StandardCharsets.UTF_8);
}
}
@@ -204,10 +306,12 @@ public class TempFileCharacterInput exte
// Create a new TempFileCharacterInput object, and fill it with our current stuff
TempFileCharacterInput rval = new TempFileCharacterInput();
rval.file = file;
+ rval.inMemoryBuffer = inMemoryBuffer;
rval.stream = stream;
rval.charLength = charLength;
rval.hashValue = hashValue;
file = null;
+ inMemoryBuffer = null;
stream = null;
charLength = -1L;
hashValue = null;
@@ -250,7 +354,13 @@ public class TempFileCharacterInput exte
try
{
// Open the file and create a stream.
- InputStream binaryStream = new FileInputStream(file);
+ InputStream binaryStream;
+ if (file != null)
+ binaryStream = new FileInputStream(file);
+ else if (inMemoryBuffer != null)
+ binaryStream = new ByteArrayInputStream(inMemoryBuffer);
+ else
+ binaryStream = null;
Reader reader = new InputStreamReader(binaryStream,StandardCharsets.UTF_8);
try
{
Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java?rev=1606942&r1=1606941&r2=1606942&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java Mon Jun 30 22:37:03 2014
@@ -34,8 +34,10 @@ public class TempFileInput extends Binar
public static final String _rcsid = "@(#)$Id: TempFileInput.java 988245 2010-08-23 18:39:35Z kwright $";
protected File file;
+ protected byte[] inMemoryBuffer;
protected final static int CHUNK_SIZE = 65536;
+ protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
/** Construct from an input stream.
* This will also create a temporary, backing file.
@@ -54,74 +56,145 @@ public class TempFileInput extends Binar
public TempFileInput(InputStream is, long length)
throws ManifoldCFException
{
+ this(is,length,DEFAULT_MAX_MEM_SIZE);
+ }
+
+ /** Construct from a length-delimited input stream.
+ *@param is is the input stream.
+ *@param length is the maximum number of bytes to transfer, or -1 if no limit.
+ *@param maxMemSize is the maximum bytes we keep in memory in lieu of using a file.
+ */
+ public TempFileInput(InputStream is, long length, int maxMemSize)
+ throws ManifoldCFException
+ {
super();
+
+ // Before we do anything else, we read the first chunk. This will allow
+ // us to determine if we're going to buffer the data in memory or not. However,
+ // it may need to be read in chunks, since there's no guarantee it will come in
+ // in the size requested.
+ int chunkSize = CHUNK_SIZE;
+
+ byte[] buffer = new byte[chunkSize];
+ int chunkTotal = 0;
+ boolean eofSeen = false;
try
{
- // Create a temporary file to put the stuff in
- File outfile = File.createTempFile("_MC_","");
- try
+ while (true)
{
- // Register the file for autodeletion, using our infrastructure.
- ManifoldCF.addFile(outfile);
- // deleteOnExit() causes memory leakage!
- // outfile.deleteOnExit();
- FileOutputStream outStream = new FileOutputStream(outfile);
- try
+ int chunkAmount;
+ if (length == -1L || length > chunkSize)
+ chunkAmount = chunkSize-chunkTotal;
+ else
{
- byte[] buffer = new byte[CHUNK_SIZE];
- long totalMoved = 0;
- while (true)
- {
- int moveAmount;
- if (length == -1L || length-totalMoved > CHUNK_SIZE)
- moveAmount = CHUNK_SIZE;
- else
- moveAmount = (int)(length-totalMoved);
- if (moveAmount == 0)
- break;
- // Read binary data in 64K chunks
- int readsize = is.read(buffer,0,moveAmount);
- if (readsize == -1)
- break;
- outStream.write(buffer,0,readsize);
- totalMoved += readsize;
- }
- // System.out.println(" Moved "+Long.toString(totalMoved));
+ chunkAmount = (int)(length-chunkTotal);
+ eofSeen = true;
}
- finally
+ if (chunkAmount == 0)
+ break;
+ int readsize = is.read(buffer,chunkTotal,chunkAmount);
+ if (readsize == -1)
{
- outStream.close();
+ eofSeen = true;
+ break;
}
-
- // Now, create the input stream.
- // Save the file name
- file = outfile;
- this.length = file.length();
-
- }
- catch (Throwable e)
- {
- // Delete the temp file we created on any error condition
- // outfile.delete();
- ManifoldCF.deleteFile(outfile);
- if (e instanceof Error)
- throw (Error)e;
- if (e instanceof RuntimeException)
- throw (RuntimeException)e;
- if (e instanceof Exception)
- throw (Exception)e;
- throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+ chunkTotal += readsize;
}
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
- catch (Exception e)
+ catch (IOException e)
{
- throw new ManifoldCFException("Cannot write temporary file",e,ManifoldCFException.GENERAL_ERROR);
+ throw new ManifoldCFException("Cannot read byte stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
}
+ if (eofSeen && chunkTotal < maxMemSize)
+ {
+ // In memory!!
+ file = null;
+ inMemoryBuffer = new byte[chunkTotal];
+ for (int i = 0; i < inMemoryBuffer.length; i++)
+ {
+ inMemoryBuffer[i] = buffer[i];
+ }
+ this.length = chunkTotal;
+ }
+ else
+ {
+ inMemoryBuffer = null;
+ try
+ {
+ // Create a temporary file to put the stuff in
+ File outfile = File.createTempFile("_MC_","");
+ try
+ {
+ // Register the file for autodeletion, using our infrastructure.
+ ManifoldCF.addFile(outfile);
+ // deleteOnExit() causes memory leakage!
+ // outfile.deleteOnExit();
+ FileOutputStream outStream = new FileOutputStream(outfile);
+ try
+ {
+ long totalMoved = 0;
+
+ // Transfor what we've already read.
+ outStream.write(buffer,0,chunkTotal);
+ totalMoved += chunkTotal;
+
+ while (true)
+ {
+ int moveAmount;
+ if (length == -1L || length-totalMoved > chunkSize)
+ moveAmount = chunkSize;
+ else
+ moveAmount = (int)(length-totalMoved);
+ if (moveAmount == 0)
+ break;
+ // Read binary data in 64K chunks
+ int readsize = is.read(buffer,0,moveAmount);
+ if (readsize == -1)
+ break;
+ outStream.write(buffer,0,readsize);
+ totalMoved += readsize;
+ }
+ // System.out.println(" Moved "+Long.toString(totalMoved));
+ }
+ finally
+ {
+ outStream.close();
+ }
+
+ // Now, create the input stream.
+ // Save the file name
+ file = outfile;
+ this.length = file.length();
+
+ }
+ catch (Throwable e)
+ {
+ // Delete the temp file we created on any error condition
+ // outfile.delete();
+ ManifoldCF.deleteFile(outfile);
+ if (e instanceof Error)
+ throw (Error)e;
+ if (e instanceof RuntimeException)
+ throw (RuntimeException)e;
+ if (e instanceof Exception)
+ throw (Exception)e;
+ throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+ }
+ }
+ catch (InterruptedIOException e)
+ {
+ throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+ }
+ catch (Exception e)
+ {
+ throw new ManifoldCFException("Cannot write temporary file",e,ManifoldCFException.GENERAL_ERROR);
+ }
+ }
}
/** Construct from an existing temporary fle.
@@ -130,6 +203,7 @@ public class TempFileInput extends Binar
public TempFileInput(File tempFile)
{
super();
+ inMemoryBuffer = null;
file = tempFile;
ManifoldCF.addFile(file);
// deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
@@ -146,9 +220,11 @@ public class TempFileInput extends Binar
{
TempFileInput rval = new TempFileInput();
rval.file = file;
+ rval.inMemoryBuffer = inMemoryBuffer;
rval.stream = stream;
rval.length = length;
file = null;
+ inMemoryBuffer = null;
stream = null;
length = -1L;
return rval;
@@ -168,21 +244,31 @@ public class TempFileInput extends Binar
protected void openStream()
throws ManifoldCFException
{
- try
+ if (file != null)
{
- // Open the file and create a stream.
- stream = new FileInputStream(file);
+ try
+ {
+ // Open the file and create a stream.
+ stream = new FileInputStream(file);
+ }
+ catch (FileNotFoundException e)
+ {
+ throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+ }
}
- catch (FileNotFoundException e)
+ else if (inMemoryBuffer != null)
{
- throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+ stream = new ByteArrayInputStream(inMemoryBuffer);
}
}
protected void calculateLength()
throws ManifoldCFException
{
- this.length = file.length();
+ if (file != null)
+ this.length = file.length();
+ else if (inMemoryBuffer != null)
+ this.length = inMemoryBuffer.length;
}
}