You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/07/01 00:37:03 UTC

svn commit: r1606942 - in /manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces: TempFileCharacterInput.java TempFileInput.java

Author: kwright
Date: Mon Jun 30 22:37:03 2014
New Revision: 1606942

URL: http://svn.apache.org/r1606942
Log:
Perform improvement: cache in memory if small enough

Modified:
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java

Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java?rev=1606942&r1=1606941&r2=1606942&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java Mon Jun 30 22:37:03 2014
@@ -26,19 +26,23 @@ import org.apache.manifoldcf.core.system
 /** This class represents a temporary file character input
 * stream.  Call the "done" method to clean up the
 * file when done.
-* NOTE: The implied flow of this method is to be handled
+* NOTE: The implied flow of this method is to be handed
 * a file that has already been created by some means.  The
 * file must be a dedicated temporary file, which can be
-* destroyed when the data has been used.
+* destroyed when the data has been used.  However, this class can also
+* buffer data in memory if the data is not too large (that is, less than a
+* supplied cutoff value).
 */
 public class TempFileCharacterInput extends CharacterInput
 {
   public static final String _rcsid = "@(#)$Id: TempFileCharacterInput.java 988245 2010-08-23 18:39:35Z kwright $";
 
   protected File file;
+  protected byte[] inMemoryBuffer;
 
   protected final static int CHUNK_SIZE = 65536;
-
+  protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
+  
   /** Construct from a non-length-delimited reader.
   *@param is is a reader to transfer from, to the end of the data.  This will, as a side effect, also calculate the character length
   *          and hash value for the data.
@@ -57,81 +61,165 @@ public class TempFileCharacterInput exte
   public TempFileCharacterInput(Reader is, long length)
     throws ManifoldCFException
   {
+    this(is,length,DEFAULT_MAX_MEM_SIZE);
+  }
+
+  /** Construct from a length-delimited reader.
+  *@param is is a reader to transfer from, to the end of the data.  This will, as a side effect, also calculate the character length
+  *          and hash value for the data.
+  *@param length is the length limit to transfer, or -1 if no limit
+  *@param maxInMemoryLength is the maximum size to keep in memory, before using a backing File object.  The amount possibly
+  *        saved in memory will be guaranteed less than this size.
+  */
+  public TempFileCharacterInput(Reader is, long length, int maxInMemoryLength)
+    throws ManifoldCFException
+  {
     super();
+    
+
+    // Before we do anything else, we read the first chunk.  This will allow
+    // us to determine if we're going to buffer the data in memory or not.  However,
+    // it may need to be read in chunks, since there's no guarantee it will come in
+    // in the size requested.
+    int chunkSize = CHUNK_SIZE;
+
+    char[] buffer = new char[chunkSize];
+    int chunkTotal = 0;
+    boolean eofSeen = false;
     try
     {
-      // Create a temporary file to put the stuff in
-      File outfile = File.createTempFile("_MC_","");
-      try
+      while (true)
       {
-        // Register the file for autodeletion, using our infrastructure.
-        ManifoldCF.addFile(outfile);
-        // deleteOnExit() causes memory leakage!
-        // outfile.deleteOnExit();
-
-        // Set up hash digest and character length counter before we start anything.
-        java.security.MessageDigest md = ManifoldCF.startHash();
+        int chunkAmount;
+        if (length == -1L || length > chunkSize)
+          chunkAmount = chunkSize-chunkTotal;
+        else
+        {
+          chunkAmount = (int)(length-chunkTotal);
+          eofSeen = true;
+        }
+        if (chunkAmount == 0)
+          break;
+        int readsize = is.read(buffer,chunkTotal,chunkAmount);
+        if (readsize == -1)
+        {
+          eofSeen = true;
+          break;
+        }
+        chunkTotal += readsize;
+      }
+    }
+    catch (InterruptedIOException e)
+    {
+      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    }
+    catch (IOException e)
+    {
+      throw new ManifoldCFException("Cannot read character stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+    }
+    
+    // Set up hash digest, and calculate the initial hash.
+    java.security.MessageDigest md = ManifoldCF.startHash();
+    String chunkString = new String(buffer,0,chunkTotal);
+    ManifoldCF.addToHash(md,chunkString);
+
+    // In order to compute the byte length, we need to convert to a byte array, which is
+    // also our final form for in-memory storage.  But we don't want to  do the work
+    // unless there's a chance it will be needed.
+    byte[] byteBuffer;
+    if (eofSeen)
+      byteBuffer = chunkString.getBytes(StandardCharsets.UTF_8);
+    else
+      byteBuffer = null;
 
-        FileOutputStream outStream = new FileOutputStream(outfile);
-        // Create a Writer corresponding to the file output stream, and encode using utf-8
-        OutputStreamWriter outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
+    if (eofSeen && byteBuffer.length <= maxInMemoryLength)
+    {
+      // Buffer locally; don't create a temp file
+      file = null;
+      inMemoryBuffer = byteBuffer;
+      charLength = chunkTotal;
+      hashValue = ManifoldCF.getHashValue(md);
+    }
+    else
+    {
+      inMemoryBuffer = null;
+      // Create a temporary file!
+      long totalMoved = 0;
+      try
+      {
+        // Create a temporary file to put the stuff in
+        File outfile = File.createTempFile("_MC_","");
         try
         {
-          char[] buffer = new char[CHUNK_SIZE];
-          long totalMoved = 0;
-          while (true)
+          // Register the file for autodeletion, using our infrastructure.
+          ManifoldCF.addFile(outfile);
+          // deleteOnExit() causes memory leakage!
+          // outfile.deleteOnExit();
+
+          FileOutputStream outStream = new FileOutputStream(outfile);
+          // Create a Writer corresponding to the file output stream, and encode using utf-8
+          OutputStreamWriter outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
+          try
           {
-            int moveAmount;
-            if (length == -1L || length-totalMoved > CHUNK_SIZE)
-              moveAmount = CHUNK_SIZE;
-            else
-              moveAmount = (int)(length-totalMoved);
-            if (moveAmount == 0)
-              break;
-            // Read character data in 64K chunks
-            int readsize = is.read(buffer,0,moveAmount);
-            if (readsize == -1)
-              break;
-            outWriter.write(buffer,0,readsize);
-            ManifoldCF.addToHash(md,new String(buffer,0,readsize));
-            totalMoved += readsize;
+            //  Transfor what we've already read.
+            outWriter.write(buffer,0,chunkTotal);
+            totalMoved += chunkTotal;
+            // Now, transfer the remainder
+            while (true)
+            {
+              int moveAmount;
+              if (length == -1L || length-totalMoved > chunkSize)
+                moveAmount = chunkSize;
+              else
+                moveAmount = (int)(length-totalMoved);
+              if (moveAmount == 0)
+                break;
+              // Read character data in 64K chunks
+              int readsize = is.read(buffer,0,moveAmount);
+              if (readsize == -1)
+                break;
+              outWriter.write(buffer,0,readsize);
+              ManifoldCF.addToHash(md,new String(buffer,0,readsize));
+              totalMoved += readsize;
+            }
+
+          }
+          finally
+          {
+            outWriter.close();
           }
 
+          // Now, create the input stream.
+          // Save the file name
+          file = outfile;
           charLength = totalMoved;
           hashValue = ManifoldCF.getHashValue(md);
+
         }
-        finally
+        catch (Throwable e)
         {
-          outWriter.close();
+          // Delete the temp file we created on any error condition
+          // outfile.delete();
+          ManifoldCF.deleteFile(outfile);
+          if (e instanceof Error)
+            throw (Error)e;
+          if (e instanceof RuntimeException)
+            throw (RuntimeException)e;
+          if (e instanceof Exception)
+            throw (Exception)e;
+          throw new Exception("Unexpected throwable: "+e.getMessage(),e);
         }
-
-        // Now, create the input stream.
-        // Save the file name
-        file = outfile;
-
       }
-      catch (Throwable e)
+      catch (InterruptedIOException e)
       {
-        // Delete the temp file we created on any error condition
-        // outfile.delete();
-        ManifoldCF.deleteFile(outfile);
-        if (e instanceof Error)
-          throw (Error)e;
-        if (e instanceof RuntimeException)
-          throw (RuntimeException)e;
-        if (e instanceof Exception)
-          throw (Exception)e;
-        throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+      }
+      catch (Exception e)
+      {
+        throw new ManifoldCFException("Cannot write temporary file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
       }
     }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
-    catch (Exception e)
-    {
-      throw new ManifoldCFException("Cannot write temporary file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
-    }
+    
 
   }
 
@@ -141,6 +229,7 @@ public class TempFileCharacterInput exte
   public TempFileCharacterInput(File tempFile)
   {
     super();
+    inMemoryBuffer = null;
     file = tempFile;
     ManifoldCF.addFile(file);
     // deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
@@ -168,6 +257,10 @@ public class TempFileCharacterInput exte
         throw new ManifoldCFException("No such file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
       }
     }
+    else if (inMemoryBuffer != null)
+    {
+      return new ByteArrayInputStream(inMemoryBuffer);
+    }
     return null;
   }
 
@@ -178,6 +271,8 @@ public class TempFileCharacterInput exte
   {
     if (file != null)
       return file.length();
+    else if (inMemoryBuffer != null)
+      return inMemoryBuffer.length;
     return 0L;
   }
 
@@ -185,15 +280,22 @@ public class TempFileCharacterInput exte
   protected void openStream()
     throws ManifoldCFException
   {
-    try
+    if (file != null)
     {
-      // Open the file and create a stream.
-      InputStream binaryStream = new FileInputStream(file);
-      stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
+      try
+      {
+        // Open the file and create a stream.
+        InputStream binaryStream = new FileInputStream(file);
+        stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
+      }
+      catch (FileNotFoundException e)
+      {
+        throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      }
     }
-    catch (FileNotFoundException e)
+    else if (inMemoryBuffer != null)
     {
-      throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      stream = new InputStreamReader(new ByteArrayInputStream(inMemoryBuffer),StandardCharsets.UTF_8);
     }
   }
 
@@ -204,10 +306,12 @@ public class TempFileCharacterInput exte
     // Create a new TempFileCharacterInput object, and fill it with our current stuff
     TempFileCharacterInput rval = new TempFileCharacterInput();
     rval.file = file;
+    rval.inMemoryBuffer = inMemoryBuffer;
     rval.stream = stream;
     rval.charLength = charLength;
     rval.hashValue = hashValue;
     file = null;
+    inMemoryBuffer = null;
     stream = null;
     charLength = -1L;
     hashValue = null;
@@ -250,7 +354,13 @@ public class TempFileCharacterInput exte
     try
     {
       // Open the file and create a stream.
-      InputStream binaryStream = new FileInputStream(file);
+      InputStream binaryStream;
+      if (file != null)
+        binaryStream = new FileInputStream(file);
+      else if (inMemoryBuffer != null)
+        binaryStream = new ByteArrayInputStream(inMemoryBuffer);
+      else
+        binaryStream = null;
       Reader reader = new InputStreamReader(binaryStream,StandardCharsets.UTF_8);
       try
       {

Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java?rev=1606942&r1=1606941&r2=1606942&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java Mon Jun 30 22:37:03 2014
@@ -34,8 +34,10 @@ public class TempFileInput extends Binar
   public static final String _rcsid = "@(#)$Id: TempFileInput.java 988245 2010-08-23 18:39:35Z kwright $";
 
   protected File file;
+  protected byte[] inMemoryBuffer;
 
   protected final static int CHUNK_SIZE = 65536;
+  protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
 
   /** Construct from an input stream.
   * This will also create a temporary, backing file.
@@ -54,74 +56,145 @@ public class TempFileInput extends Binar
   public TempFileInput(InputStream is, long length)
     throws ManifoldCFException
   {
+    this(is,length,DEFAULT_MAX_MEM_SIZE);
+  }
+  
+  /** Construct from a length-delimited input stream.
+  *@param is is the input stream.
+  *@param length is the maximum number of bytes to transfer, or -1 if no limit.
+  *@param maxMemSize is the maximum bytes we keep in memory in lieu of using a file.
+  */
+  public TempFileInput(InputStream is, long length, int maxMemSize)
+    throws ManifoldCFException
+  {
     super();
+    
+    // Before we do anything else, we read the first chunk.  This will allow
+    // us to determine if we're going to buffer the data in memory or not.  However,
+    // it may need to be read in chunks, since there's no guarantee it will come in
+    // in the size requested.
+    int chunkSize = CHUNK_SIZE;
+
+    byte[] buffer = new byte[chunkSize];
+    int chunkTotal = 0;
+    boolean eofSeen = false;
     try
     {
-      // Create a temporary file to put the stuff in
-      File outfile = File.createTempFile("_MC_","");
-      try
+      while (true)
       {
-        // Register the file for autodeletion, using our infrastructure.
-        ManifoldCF.addFile(outfile);
-        // deleteOnExit() causes memory leakage!
-        // outfile.deleteOnExit();
-        FileOutputStream outStream = new FileOutputStream(outfile);
-        try
+        int chunkAmount;
+        if (length == -1L || length > chunkSize)
+          chunkAmount = chunkSize-chunkTotal;
+        else
         {
-          byte[] buffer = new byte[CHUNK_SIZE];
-          long totalMoved = 0;
-          while (true)
-          {
-            int moveAmount;
-            if (length == -1L || length-totalMoved > CHUNK_SIZE)
-              moveAmount = CHUNK_SIZE;
-            else
-              moveAmount = (int)(length-totalMoved);
-            if (moveAmount == 0)
-              break;
-            // Read binary data in 64K chunks
-            int readsize = is.read(buffer,0,moveAmount);
-            if (readsize == -1)
-              break;
-            outStream.write(buffer,0,readsize);
-            totalMoved += readsize;
-          }
-          // System.out.println(" Moved "+Long.toString(totalMoved));
+          chunkAmount = (int)(length-chunkTotal);
+          eofSeen = true;
         }
-        finally
+        if (chunkAmount == 0)
+          break;
+        int readsize = is.read(buffer,chunkTotal,chunkAmount);
+        if (readsize == -1)
         {
-          outStream.close();
+          eofSeen = true;
+          break;
         }
-
-        // Now, create the input stream.
-        // Save the file name
-        file = outfile;
-        this.length = file.length();
-
-      }
-      catch (Throwable e)
-      {
-        // Delete the temp file we created on any error condition
-        // outfile.delete();
-        ManifoldCF.deleteFile(outfile);
-        if (e instanceof Error)
-          throw (Error)e;
-        if (e instanceof RuntimeException)
-          throw (RuntimeException)e;
-        if (e instanceof Exception)
-          throw (Exception)e;
-        throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+        chunkTotal += readsize;
       }
     }
     catch (InterruptedIOException e)
     {
       throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
     }
-    catch (Exception e)
+    catch (IOException e)
     {
-      throw new ManifoldCFException("Cannot write temporary file",e,ManifoldCFException.GENERAL_ERROR);
+      throw new ManifoldCFException("Cannot read byte stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
     }
 
+    if (eofSeen && chunkTotal < maxMemSize)
+    {
+      // In memory!!
+      file = null;
+      inMemoryBuffer = new byte[chunkTotal];
+      for (int i = 0; i < inMemoryBuffer.length; i++)
+      {
+        inMemoryBuffer[i] = buffer[i];
+      }
+      this.length = chunkTotal;
+    }
+    else
+    {
+      inMemoryBuffer = null;
+      try
+      {
+        // Create a temporary file to put the stuff in
+        File outfile = File.createTempFile("_MC_","");
+        try
+        {
+          // Register the file for autodeletion, using our infrastructure.
+          ManifoldCF.addFile(outfile);
+          // deleteOnExit() causes memory leakage!
+          // outfile.deleteOnExit();
+          FileOutputStream outStream = new FileOutputStream(outfile);
+          try
+          {
+            long totalMoved = 0;
+            
+            //  Transfor what we've already read.
+            outStream.write(buffer,0,chunkTotal);
+            totalMoved += chunkTotal;
+
+            while (true)
+            {
+              int moveAmount;
+              if (length == -1L || length-totalMoved > chunkSize)
+                moveAmount = chunkSize;
+              else
+                moveAmount = (int)(length-totalMoved);
+              if (moveAmount == 0)
+                break;
+              // Read binary data in 64K chunks
+              int readsize = is.read(buffer,0,moveAmount);
+              if (readsize == -1)
+                break;
+              outStream.write(buffer,0,readsize);
+              totalMoved += readsize;
+            }
+            // System.out.println(" Moved "+Long.toString(totalMoved));
+          }
+          finally
+          {
+            outStream.close();
+          }
+
+          // Now, create the input stream.
+          // Save the file name
+          file = outfile;
+          this.length = file.length();
+
+        }
+        catch (Throwable e)
+        {
+          // Delete the temp file we created on any error condition
+          // outfile.delete();
+          ManifoldCF.deleteFile(outfile);
+          if (e instanceof Error)
+            throw (Error)e;
+          if (e instanceof RuntimeException)
+            throw (RuntimeException)e;
+          if (e instanceof Exception)
+            throw (Exception)e;
+          throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+        }
+      }
+      catch (InterruptedIOException e)
+      {
+        throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+      }
+      catch (Exception e)
+      {
+        throw new ManifoldCFException("Cannot write temporary file",e,ManifoldCFException.GENERAL_ERROR);
+      }
+    }
   }
 
   /** Construct from an existing temporary fle.
@@ -130,6 +203,7 @@ public class TempFileInput extends Binar
   public TempFileInput(File tempFile)
   {
     super();
+    inMemoryBuffer = null;
     file = tempFile;
     ManifoldCF.addFile(file);
     // deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
@@ -146,9 +220,11 @@ public class TempFileInput extends Binar
   {
     TempFileInput rval = new TempFileInput();
     rval.file = file;
+    rval.inMemoryBuffer = inMemoryBuffer;
     rval.stream = stream;
     rval.length = length;
     file = null;
+    inMemoryBuffer = null;
     stream = null;
     length = -1L;
     return rval;
@@ -168,21 +244,31 @@ public class TempFileInput extends Binar
   protected void openStream()
     throws ManifoldCFException
   {
-    try
+    if (file != null)
     {
-      // Open the file and create a stream.
-      stream = new FileInputStream(file);
+      try
+      {
+        // Open the file and create a stream.
+        stream = new FileInputStream(file);
+      }
+      catch (FileNotFoundException e)
+      {
+        throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      }
     }
-    catch (FileNotFoundException e)
+    else if (inMemoryBuffer != null)
     {
-      throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      stream = new ByteArrayInputStream(inMemoryBuffer);
     }
   }
 
   protected void calculateLength()
     throws ManifoldCFException
   {
-    this.length = file.length();
+    if (file != null)
+      this.length = file.length();
+    else if (inMemoryBuffer != null)
+      this.length = inMemoryBuffer.length;
   }
 
 }