You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2023/01/01 15:59:44 UTC

svn commit: r1906326 - in /poi/trunk/poi-scratchpad/src: main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java

Author: centic
Date: Sun Jan  1 15:59:44 2023
New Revision: 1906326

URL: http://svn.apache.org/viewvc?rev=1906326&view=rev
Log:
Prevent more cases of unbounded allocation

Test WordToTextConverter with all sample files

Modified:
    poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
    poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java

Modified: poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
URL: http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java?rev=1906326&r1=1906325&r2=1906326&view=diff
==============================================================================
--- poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java (original)
+++ poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java Sun Jan  1 15:59:44 2023
@@ -20,56 +20,56 @@ import java.util.Arrays;
 
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.LittleEndianConsts;
 
 import static java.lang.Integer.toHexString;
 import static org.apache.logging.log4j.util.Unbox.box;
 
-public class NilPICFAndBinData
-{
-
+public class NilPICFAndBinData {
     private static final Logger LOGGER = LogManager.getLogger(NilPICFAndBinData.class);
 
+    // limit the default maximum length of the allocated fields
+    private static final int MAX_SIZE = 100_000;
+
     private byte[] _binData;
 
-    public NilPICFAndBinData( byte[] data, int offset )
-    {
+    public NilPICFAndBinData( byte[] data, int offset ) {
         fillFields( data, offset );
     }
 
-    public void fillFields( byte[] data, int offset )
-    {
+    public void fillFields( byte[] data, int offset ) {
         int lcb = LittleEndian.getInt( data, offset );
         int cbHeader = LittleEndian.getUShort( data, offset
                 + LittleEndianConsts.INT_SIZE );
 
-        if ( cbHeader != 0x44 )
-        {
+        if ( cbHeader != 0x44 ) {
             LOGGER.atWarn().log("NilPICFAndBinData at offset {} cbHeader 0x{} != 0x44", box(offset), toHexString(cbHeader));
         }
 
+        // make sure these do not cause OOM if passed as invalid or extremely large values
+        IOUtils.safelyAllocateCheck(lcb, MAX_SIZE);
+        IOUtils.safelyAllocateCheck(cbHeader, MAX_SIZE);
+
         // skip the 62 ignored bytes
         int binaryLength = lcb - cbHeader;
         this._binData = Arrays.copyOfRange(data, offset + cbHeader,
                 offset + cbHeader + binaryLength);
     }
 
-    public byte[] getBinData()
-    {
+    public byte[] getBinData() {
         return _binData;
     }
 
-    public byte[] serialize()
-    {
+    public byte[] serialize() {
         byte[] bs = new byte[_binData.length + 0x44];
         LittleEndian.putInt( bs, 0, _binData.length + 0x44 );
         System.arraycopy( _binData, 0, bs, 0x44, _binData.length );
         return bs;
     }
 
-    public int serialize( byte[] data, int offset )
-    {
+    public int serialize( byte[] data, int offset ) {
         LittleEndian.putInt( data, offset, _binData.length + 0x44 );
         System.arraycopy( _binData, 0, data, offset + 0x44, _binData.length );
         return 0x44 + _binData.length;

Modified: poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
URL: http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java?rev=1906326&r1=1906325&r2=1906326&view=diff
==============================================================================
--- poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java (original)
+++ poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java Sun Jan  1 15:59:44 2023
@@ -20,10 +20,29 @@ import static org.apache.poi.hwpf.HWPFTe
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FilenameFilter;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.stream.Stream;
+
+import org.apache.commons.io.filefilter.SuffixFileFilter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.util.RecordFormatException;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 public class TestWordToTextConverter {
+    private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class);
 
     /**
      * [FAILING] Bug 47731 - Word Extractor considers text copied from some
@@ -60,4 +79,38 @@ public class TestWordToTextConverter {
             assertNotNull(WordToTextConverter.getText(doc));
         }
     }
+
+    @ParameterizedTest
+    @MethodSource("files")
+    void testAllFiles(File file) throws Exception {
+        LOG.info("Testing " + file);
+        try (FileInputStream stream = new FileInputStream(file)) {
+            InputStream is = FileMagic.prepareToCheckMagic(stream);
+            FileMagic fm = FileMagic.valueOf(is);
+
+            if (fm != FileMagic.OLE2) {
+                LOG.info("Skip non-doc file " + file);
+
+                return;
+            }
+
+            try (HWPFDocument doc = new HWPFDocument(is)) {
+                String foundText = WordToTextConverter.getText(doc);
+                assertNotNull(foundText);
+            } catch (OldWordFileFormatException | EncryptedDocumentException | RecordFormatException e) {
+                // ignored here
+            }
+        }
+    }
+
+    public static Stream<Arguments> files() {
+        String dataDirName = System.getProperty(POIDataSamples.TEST_PROPERTY,
+                new File("test-data").exists() ? "test-data" : "../test-data");
+
+        File[] documents = new File(dataDirName, "document").listFiles(
+                (FilenameFilter) new SuffixFileFilter(".doc"));
+        assertNotNull(documents);
+
+        return Arrays.stream(documents).map(Arguments::of);
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org