You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2020/05/16 13:06:07 UTC

svn commit: r1877816 - in /poi/trunk: src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java test-data/poifs/64322.ole2

Author: centic
Date: Sat May 16 13:06:07 2020
New Revision: 1877816

URL: http://svn.apache.org/viewvc?rev=1877816&view=rev
Log:
Bug 64322: Optimize performance of reading ole2 files

Remember channel-size to no re-read it for every read-access,
but reset it if we extend the size of the file
profiling indicates Channel.size() sometimes has similar runtime
overhead as position() or read()!

Added:
    poi/trunk/test-data/poifs/64322.ole2
Modified:
    poi/trunk/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java
    poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java

Modified: poi/trunk/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java?rev=1877816&r1=1877815&r2=1877816&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/nio/FileBackedDataSource.java Sat May 16 13:06:07 2020
@@ -39,6 +39,8 @@ public class FileBackedDataSource extend
     private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class);
 
     private final FileChannel channel;
+    private Long channelSize;
+
     private final boolean writable;
     // remember file base, which needs to be closed too
     private RandomAccessFile srcFile;
@@ -97,8 +99,9 @@ public class FileBackedDataSource extend
             // remember this buffer for cleanup
             buffersToClean.put(dst,dst);
         } else {
-            // allocate the buffer on the heap if we cannot map the data in directly
             channel.position(position);
+
+            // allocate the buffer on the heap if we cannot map the data in directly
             dst = ByteBuffer.allocate(length);
 
             // Read the contents and check that we could read some data
@@ -118,6 +121,11 @@ public class FileBackedDataSource extend
     @Override
     public void write(ByteBuffer src, long position) throws IOException {
         channel.write(src, position);
+
+        // we have to re-read size if we write "after" the recorded one
+        if(channelSize != null && position >= channelSize) {
+            channelSize = null;
+        }
     }
 
     @Override
@@ -131,7 +139,13 @@ public class FileBackedDataSource extend
 
     @Override
     public long size() throws IOException {
-        return channel.size();
+        // this is called often and profiling showed that channel.size()
+        // was taking a large part of processing-time, so we only read it
+        // once
+        if(channelSize == null) {
+            channelSize = channel.size();
+        }
+        return channelSize;
     }
 
     public void releaseBuffer(ByteBuffer buffer) {

Modified: poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java?rev=1877816&r1=1877815&r2=1877816&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java (original)
+++ poi/trunk/src/testcases/org/apache/poi/poifs/filesystem/TestPOIFSFileSystem.java Sat May 16 13:06:07 2020
@@ -24,11 +24,18 @@ import static org.junit.Assert.fail;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.util.HashMap;
 
 import org.apache.poi.POIDataSamples;
+import org.apache.poi.hpsf.NoPropertySetStreamException;
+import org.apache.poi.hpsf.Property;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.Section;
 import org.apache.poi.hssf.HSSFTestDataSamples;
 import org.apache.poi.poifs.common.POIFSBigBlockSize;
 import org.apache.poi.poifs.common.POIFSConstants;
@@ -177,7 +184,7 @@ public final class TestPOIFSFileSystem {
 			fail("File is corrupt and shouldn't have been opened");
 		}
 	}
-	
+
 	/**
 	 * Tests that we can write and read a file that contains XBATs
 	 *  as well as regular BATs.
@@ -191,19 +198,19 @@ public final class TestPOIFSFileSystem {
 	   fs.getRoot().createDocument(
 	         "BIG", new ByteArrayInputStream(hugeStream)
 	   );
-	   
+
 	   ByteArrayOutputStream baos = new ByteArrayOutputStream();
 	   fs.writeFilesystem(baos);
 	   byte[] fsData = baos.toByteArray();
-	   
-	   
+
+
 	   // Check the header was written properly
-	   InputStream inp = new ByteArrayInputStream(fsData); 
+	   InputStream inp = new ByteArrayInputStream(fsData);
 	   HeaderBlock header = new HeaderBlock(inp);
 	   assertEquals(109+21, header.getBATCount());
 	   assertEquals(1, header.getXBATCount());
-	   
-	   
+
+
 	   // We should have 21 BATs in the XBAT
 	   ByteBuffer xbatData = ByteBuffer.allocate(512);
 	   xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
@@ -216,19 +223,19 @@ public final class TestPOIFSFileSystem {
 	      assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
 	   }
 	   assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
-	   
-	   
+
+
 	   // Now load it and check
 	   fs = new POIFSFileSystem(
 	         new ByteArrayInputStream(fsData)
 	   );
-	   
+
 	   DirectoryNode root = fs.getRoot();
 	   assertEquals(1, root.getEntryCount());
 	   DocumentNode big = (DocumentNode)root.getEntry("BIG");
 	   assertEquals(hugeStream.length, big.getSize());
 	}
-	
+
 	/**
 	 * Most OLE2 files use 512byte blocks. However, a small number
 	 *  use 4k blocks. Check that we can open these.
@@ -293,4 +300,66 @@ public final class TestPOIFSFileSystem {
 
 		assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8)));
 	}
+
+	@Test
+	public void test64322() throws NoPropertySetStreamException, IOException {
+		try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.getFile("64322.ole2"))) {
+			int count = recurseDir(poiFS.getRoot());
+
+			assertEquals("Expecting a fixed number of entries being found in the test-document",
+					1285, count);
+		}
+	}
+
+	@Test
+	public void test64322a() throws NoPropertySetStreamException, IOException {
+		try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.openResourceAsStream("64322.ole2"))) {
+			int count = recurseDir(poiFS.getRoot());
+
+			assertEquals("Expecting a fixed number of entries being found in the test-document",
+					1285, count);
+		}
+	}
+
+	private static int recurseDir(DirectoryEntry dir) throws IOException, NoPropertySetStreamException {
+		int count = 0;
+		for (Entry entry : dir) {
+			count++;
+			if (entry instanceof DirectoryEntry) {
+				count += recurseDir((DirectoryEntry) entry);
+			}
+			if (entry instanceof DocumentEntry) {
+				DocumentEntry de = (DocumentEntry) entry;
+				HashMap<String, String> props = new HashMap<>();
+				try (DocumentInputStream dis = new DocumentInputStream(de)) {
+					props.put("name", de.getName());
+
+					if (PropertySet.isPropertySetStream(dis)) {
+						dis.mark(10000000);
+						PropertySet ps = null;
+						try {
+							ps = new PropertySet(dis);
+
+						} catch (UnsupportedEncodingException e) {
+							// ignore
+						}
+						if (ps != null) {
+							for (Section section : ps.getSections()) {
+								for (Property p : section.getProperties()) {
+									String prop = section.getDictionary() != null
+											? section.getDictionary().get(p.getID())
+											: String.valueOf(p.getID());
+									if (p.getValue() != null)
+										props.put("property_" + prop, p.getValue().toString());
+								}
+							}
+						}
+						dis.reset();
+					}
+				}
+				assertTrue(props.size() > 0);
+			}
+		}
+		return count;
+	}
 }

Added: poi/trunk/test-data/poifs/64322.ole2
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/poifs/64322.ole2?rev=1877816&view=auto
==============================================================================
Binary files poi/trunk/test-data/poifs/64322.ole2 (added) and poi/trunk/test-data/poifs/64322.ole2 Sat May 16 13:06:07 2020 differ



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org