You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2023/01/01 15:59:44 UTC
svn commit: r1906326 - in /poi/trunk/poi-scratchpad/src: main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
Author: centic
Date: Sun Jan 1 15:59:44 2023
New Revision: 1906326
URL: http://svn.apache.org/viewvc?rev=1906326&view=rev
Log:
Prevent more cases of unbounded allocation
Test WordToTextConverter with all sample files
Modified:
poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
Modified: poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java
URL: http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java?rev=1906326&r1=1906325&r2=1906326&view=diff
==============================================================================
--- poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java (original)
+++ poi/trunk/poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/NilPICFAndBinData.java Sun Jan 1 15:59:44 2023
@@ -20,56 +20,56 @@ import java.util.Arrays;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import static java.lang.Integer.toHexString;
import static org.apache.logging.log4j.util.Unbox.box;
-public class NilPICFAndBinData
-{
-
+public class NilPICFAndBinData {
private static final Logger LOGGER = LogManager.getLogger(NilPICFAndBinData.class);
+ // limit the default maximum length of the allocated fields
+ private static final int MAX_SIZE = 100_000;
+
private byte[] _binData;
- public NilPICFAndBinData( byte[] data, int offset )
- {
+ public NilPICFAndBinData( byte[] data, int offset ) {
fillFields( data, offset );
}
- public void fillFields( byte[] data, int offset )
- {
+ public void fillFields( byte[] data, int offset ) {
int lcb = LittleEndian.getInt( data, offset );
int cbHeader = LittleEndian.getUShort( data, offset
+ LittleEndianConsts.INT_SIZE );
- if ( cbHeader != 0x44 )
- {
+ if ( cbHeader != 0x44 ) {
LOGGER.atWarn().log("NilPICFAndBinData at offset {} cbHeader 0x{} != 0x44", box(offset), toHexString(cbHeader));
}
+ // make sure these do not cause OOM if passed as invalid or extremely large values
+ IOUtils.safelyAllocateCheck(lcb, MAX_SIZE);
+ IOUtils.safelyAllocateCheck(cbHeader, MAX_SIZE);
+
// skip the 62 ignored bytes
int binaryLength = lcb - cbHeader;
this._binData = Arrays.copyOfRange(data, offset + cbHeader,
offset + cbHeader + binaryLength);
}
- public byte[] getBinData()
- {
+ public byte[] getBinData() {
return _binData;
}
- public byte[] serialize()
- {
+ public byte[] serialize() {
byte[] bs = new byte[_binData.length + 0x44];
LittleEndian.putInt( bs, 0, _binData.length + 0x44 );
System.arraycopy( _binData, 0, bs, 0x44, _binData.length );
return bs;
}
- public int serialize( byte[] data, int offset )
- {
+ public int serialize( byte[] data, int offset ) {
LittleEndian.putInt( data, offset, _binData.length + 0x44 );
System.arraycopy( _binData, 0, data, offset + 0x44, _binData.length );
return 0x44 + _binData.length;
Modified: poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java
URL: http://svn.apache.org/viewvc/poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java?rev=1906326&r1=1906325&r2=1906326&view=diff
==============================================================================
--- poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java (original)
+++ poi/trunk/poi-scratchpad/src/test/java/org/apache/poi/hwpf/converter/TestWordToTextConverter.java Sun Jan 1 15:59:44 2023
@@ -20,10 +20,29 @@ import static org.apache.poi.hwpf.HWPFTe
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FilenameFilter;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.stream.Stream;
+
+import org.apache.commons.io.filefilter.SuffixFileFilter;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.util.RecordFormatException;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
public class TestWordToTextConverter {
+ private static final Logger LOG = LogManager.getLogger(WordToTextConverter.class);
/**
* [FAILING] Bug 47731 - Word Extractor considers text copied from some
@@ -60,4 +79,38 @@ public class TestWordToTextConverter {
assertNotNull(WordToTextConverter.getText(doc));
}
}
+
+ @ParameterizedTest
+ @MethodSource("files")
+ void testAllFiles(File file) throws Exception {
+ LOG.info("Testing " + file);
+ try (FileInputStream stream = new FileInputStream(file)) {
+ InputStream is = FileMagic.prepareToCheckMagic(stream);
+ FileMagic fm = FileMagic.valueOf(is);
+
+ if (fm != FileMagic.OLE2) {
+ LOG.info("Skip non-doc file " + file);
+
+ return;
+ }
+
+ try (HWPFDocument doc = new HWPFDocument(is)) {
+ String foundText = WordToTextConverter.getText(doc);
+ assertNotNull(foundText);
+ } catch (OldWordFileFormatException | EncryptedDocumentException | RecordFormatException e) {
+ // ignored here
+ }
+ }
+ }
+
+ public static Stream<Arguments> files() {
+ String dataDirName = System.getProperty(POIDataSamples.TEST_PROPERTY,
+ new File("test-data").exists() ? "test-data" : "../test-data");
+
+ File[] documents = new File(dataDirName, "document").listFiles(
+ (FilenameFilter) new SuffixFileFilter(".doc"));
+ assertNotNull(documents);
+
+ return Arrays.stream(documents).map(Arguments::of);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org