You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2017/06/06 22:21:12 UTC

svn commit: r1797837 - in /poi: site/src/documentation/content/xdocs/ trunk/src/java/org/apache/poi/ trunk/src/java/org/apache/poi/hssf/record/crypto/ trunk/src/java/org/apache/poi/poifs/crypt/ trunk/src/java/org/apache/poi/poifs/crypt/binaryrc4/ trunk...

Author: kiwiwings
Date: Tue Jun  6 22:21:11 2017
New Revision: 1797837

URL: http://svn.apache.org/viewvc?rev=1797837&view=rev
Log:
#61162 - En-/decryption support for HWPF
Decryption for Binary RC4 and CryptoAPI (... XOR is missing)

Added:
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java   (with props)
    poi/trunk/test-data/document/password_password_cryptoapi.doc   (with props)
    poi/trunk/test-data/document/password_tika_binaryrc4.doc   (with props)
Modified:
    poi/site/src/documentation/content/xdocs/encryption.xml
    poi/site/src/documentation/content/xdocs/status.xml
    poi/trunk/src/java/org/apache/poi/POIDocument.java
    poi/trunk/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java
    poi/trunk/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java
    poi/trunk/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java

Modified: poi/site/src/documentation/content/xdocs/encryption.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/encryption.xml?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/encryption.xml (original)
+++ poi/site/src/documentation/content/xdocs/encryption.xml Tue Jun  6 22:21:11 2017
@@ -84,7 +84,7 @@
             <td><link href="https://msdn.microsoft.com/en-us/library/dd909583(v=office.12).aspx">40-bit RC4 encryption</link></td>
             <td class="feature-yes">Yes (Writing since 3.16)</td>
             <td class="feature-na">N/A</td>
-            <td class="feature-no">No</td>
+            <td class="feature-partly">Read (since 3.17)</td>
             <td class="feature-na">N/A</td>
             <td class="feature-na">N/A</td>
             <td class="feature-na">N/A</td>
@@ -93,7 +93,7 @@
             <td><link href="https://msdn.microsoft.com/en-us/library/dd910113(v=office.12).aspx">Office Binary Document RC4 CryptoAPI Encryption</link></td>
             <td class="feature-yes">Yes (Since 3.16)</td>
             <td class="feature-yes">Yes</td>
-            <td class="feature-no">No</td>
+            <td class="feature-partly">Read (since 3.17)</td>
             <td class="feature-na">N/A</td>
             <td class="feature-na">N/A</td>
             <td class="feature-na">N/A</td>

Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Tue Jun  6 22:21:11 2017
@@ -58,6 +58,7 @@
 
     <release version="3.17-beta1" date="2017-07-??">
       <actions>
+        <action dev="PD" type="add" fixes-bug="61162" module="HWPF">En-/decryption support for HWPF</action>
         <action dev="PD" type="fix" fixes-bug="60230" module="XSSF">Round trip workbook encryption and decryption</action>
         <action dev="PD" type="fix" fixes-bug="58325" module="XSSF">XSSFDrawing.getShapes() returns zero if sheet has more than one embedded OLE object</action>
         <action dev="PD" type="fix" fixes-bug="61119" module="SL Common">Fix preset shape rendering and shading</action>

Modified: poi/trunk/src/java/org/apache/poi/POIDocument.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/POIDocument.java?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/POIDocument.java (original)
+++ poi/trunk/src/java/org/apache/poi/POIDocument.java Tue Jun  6 22:21:11 2017
@@ -195,7 +195,7 @@ public abstract class POIDocument implem
         NPOIFSFileSystem encPoifs = null;
         String step = "getting";
         try {
-            if (encryptionInfo != null) {
+            if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
                 step = "getting encrypted";
                 String encryptedStream = null;
                 for (String s : encryptedStreamNames) {

Modified: poi/trunk/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java (original)
+++ poi/trunk/src/java/org/apache/poi/hssf/record/crypto/Biff8EncryptionKey.java Tue Jun  6 22:21:11 2017
@@ -32,7 +32,11 @@ public final class Biff8EncryptionKey {
 	 * @param password pass <code>null</code> to clear user password (and use default)
 	 */
 	public static void setCurrentUserPassword(String password) {
-		_userPasswordTLS.set(password);
+	    if (password == null) {
+	        _userPasswordTLS.remove();
+	    } else {
+	        _userPasswordTLS.set(password);
+	    }
 	}
 
 	/**

Modified: poi/trunk/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/crypt/EncryptionInfo.java Tue Jun  6 22:21:11 2017
@@ -122,8 +122,11 @@ public class EncryptionInfo implements C
         } else if (
                2 <= versionMajor && versionMajor <= 4
             && versionMinor == 2) {
-            encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard;
             encryptionFlags = dis.readInt();
+            encryptionMode = (
+                preferredEncryptionMode == cryptoAPI
+                || !flagAES.isSet(encryptionFlags))
+                ? cryptoAPI : standard;
         } else if (
                versionMajor == agile.versionMajor
             && versionMinor == agile.versionMinor){
@@ -268,6 +271,14 @@ public class EncryptionInfo implements C
         return encryptionMode;
     }
     
+    /**
+     * @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream,
+     * otherwise the Summaries aren't encrypted and located in their usual streams
+     */
+    public boolean isDocPropsEncrypted() {
+        return !flagDocProps.isSet(getEncryptionFlags());
+    }
+    
     @Override
     public EncryptionInfo clone() throws CloneNotSupportedException {
         EncryptionInfo other = (EncryptionInfo)super.clone();

Modified: poi/trunk/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java Tue Jun  6 22:21:11 2017
@@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends
             super(stream, size, chunkSize);
         }
 
-        public BinaryRC4CipherInputStream(InputStream stream)
+        public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos)
                 throws GeneralSecurityException {
-            super(stream, Integer.MAX_VALUE, chunkSize);
+            super(stream, size, chunkSize, initialPos);
         }    
     }
 
@@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends
     @Override
     public InputStream getDataStream(InputStream stream, int size, int initialPos)
             throws IOException, GeneralSecurityException {
-        return new BinaryRC4CipherInputStream(stream);
+        return new BinaryRC4CipherInputStream(stream, size, initialPos);
     }
     
 

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java Tue Jun  6 22:21:11 2017
@@ -18,6 +18,7 @@
 package org.apache.poi.hwpf;
 
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -25,9 +26,29 @@ import java.io.OutputStream;
 
 import org.apache.poi.hpsf.DocumentSummaryInformation;
 import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.hwpf.model.*;
+import org.apache.poi.hwpf.model.BookmarksTables;
+import org.apache.poi.hwpf.model.CHPBinTable;
+import org.apache.poi.hwpf.model.ComplexFileTable;
+import org.apache.poi.hwpf.model.DocumentProperties;
+import org.apache.poi.hwpf.model.EscherRecordHolder;
+import org.apache.poi.hwpf.model.FSPADocumentPart;
+import org.apache.poi.hwpf.model.FSPATable;
+import org.apache.poi.hwpf.model.FieldsTables;
+import org.apache.poi.hwpf.model.FontTable;
+import org.apache.poi.hwpf.model.ListTables;
+import org.apache.poi.hwpf.model.NoteType;
+import org.apache.poi.hwpf.model.NotesTables;
+import org.apache.poi.hwpf.model.PAPBinTable;
+import org.apache.poi.hwpf.model.PicturesTable;
+import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
+import org.apache.poi.hwpf.model.SavedByTable;
+import org.apache.poi.hwpf.model.SectionTable;
+import org.apache.poi.hwpf.model.SinglentonTextPiece;
+import org.apache.poi.hwpf.model.StyleSheet;
+import org.apache.poi.hwpf.model.SubdocumentType;
+import org.apache.poi.hwpf.model.TextPiece;
+import org.apache.poi.hwpf.model.TextPieceTable;
 import org.apache.poi.hwpf.model.io.HWPFFileSystem;
-import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 import org.apache.poi.hwpf.usermodel.Bookmarks;
 import org.apache.poi.hwpf.usermodel.BookmarksImpl;
 import org.apache.poi.hwpf.usermodel.Field;
@@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.Off
 import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
 import org.apache.poi.hwpf.usermodel.Range;
 import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.EntryUtils;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.Internal;
 
 /**
@@ -59,8 +79,6 @@ public final class HWPFDocument extends
     private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
 
     private static final String STREAM_DATA = "Data";
-    private static final String STREAM_TABLE_0 = "0Table";
-    private static final String STREAM_TABLE_1 = "1Table";
 
     /** table stream buffer*/
     protected byte[] _tableStream;
@@ -178,11 +196,7 @@ public final class HWPFDocument extends
         }
 
         // use the fib to determine the name of the table stream.
-        String name = STREAM_TABLE_0;
-        if (_fib.getFibBase().isFWhichTblStm())
-        {
-            name = STREAM_TABLE_1;
-        }
+        String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0;
 
         // Grab the table stream.
         if (!directory.hasEntry(name)) {
@@ -190,25 +204,12 @@ public final class HWPFDocument extends
         }
 
         // read in the table stream.
-        InputStream is = directory.createDocumentInputStream(name);
-        _tableStream = IOUtils.toByteArray(is);
-        is.close();
+        _tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE);
 
         _fib.fillVariableFields(_mainStream, _tableStream);
 
         // read in the data stream.
-        InputStream dis = null;
-        try {
-            DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA);
-            dis = directory.createDocumentInputStream(STREAM_DATA);
-            _dataStream = IOUtils.toByteArray(dis, dataProps.getSize());
-        } catch(IOException e) {
-            _dataStream = new byte[0];
-        } finally {
-            if (dis != null) {
-                dis.close();
-            }
-        }
+        _dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0];
 
         // Get the cp of the start of text in the main stream
         // The latest spec doc says this is always zero!
@@ -233,8 +234,7 @@ public final class HWPFDocument extends
          */
         boolean preserveBinTables = false;
         try {
-            preserveBinTables = Boolean.parseBoolean( System
-                    .getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
+            preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
         } catch ( Exception exc ) {
             // ignore;
         }
@@ -250,8 +250,7 @@ public final class HWPFDocument extends
          */
         boolean preserveTextTable = false;
         try {
-            preserveTextTable = Boolean.parseBoolean( System
-                    .getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
+            preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
         } catch ( Exception exc ) {
             // ignore;
         }
@@ -612,8 +611,8 @@ public final class HWPFDocument extends
     private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException {
         // initialize our streams for writing.
         HWPFFileSystem docSys = new HWPFFileSystem();
-        HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
-        HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
+        ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
+        ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
         //HWPFOutputStream dataStream = docSys.getStream("Data");
         int tableOffset = 0;
 
@@ -630,13 +629,13 @@ public final class HWPFDocument extends
         // it after we write everything else.
         byte[] placeHolder = new byte[fibSize];
         wordDocumentStream.write(placeHolder);
-        int mainOffset = wordDocumentStream.getOffset();
+        int mainOffset = wordDocumentStream.size();
 
         // write out the StyleSheet.
         _fib.setFcStshf(tableOffset);
         _ss.writeTo(tableStream);
-        _fib.setLcbStshf(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
+        _fib.setLcbStshf(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
 
         // get fcMin and fcMac because we will be writing the actual text with the
         // complex table.
@@ -654,9 +653,9 @@ public final class HWPFDocument extends
         // write out the Complex table, includes text.
         _fib.setFcClx(tableOffset);
         _cft.writeTo(wordDocumentStream, tableStream);
-        _fib.setLcbClx(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
-        int fcMac = wordDocumentStream.getOffset();
+        _fib.setLcbClx(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
+        int fcMac = wordDocumentStream.size();
 
         /*
          * dop (document properties record) Written immediately after the end of
@@ -670,8 +669,8 @@ public final class HWPFDocument extends
         // write out the DocumentProperties.
         _fib.setFcDop(tableOffset);
         _dop.writeTo(tableStream);
-        _fib.setLcbDop(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
+        _fib.setLcbDop(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
 
         /*
          * plcfBkmkf (table recording beginning CPs of bookmarks) Written
@@ -683,7 +682,7 @@ public final class HWPFDocument extends
         if ( _bookmarksTables != null )
         {
             _bookmarksTables.writePlcfBkmkf( _fib, tableStream );
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         /*
@@ -696,7 +695,7 @@ public final class HWPFDocument extends
         if ( _bookmarksTables != null )
         {
             _bookmarksTables.writePlcfBkmkl( _fib, tableStream );
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         /*
@@ -710,8 +709,8 @@ public final class HWPFDocument extends
         // write out the CHPBinTable.
         _fib.setFcPlcfbteChpx(tableOffset);
         _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
-        _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
+        _fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
 
         /*
          * plcfbtePapx (bin table for PAP FKPs) Written immediately after the
@@ -724,8 +723,8 @@ public final class HWPFDocument extends
         // write out the PAPBinTable.
         _fib.setFcPlcfbtePapx(tableOffset);
         _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
-        _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
+        _fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
 
         /*
          * plcfendRef (endnote reference position table) Written immediately
@@ -739,7 +738,7 @@ public final class HWPFDocument extends
          */
         _endnotesTables.writeRef( _fib, tableStream );
         _endnotesTables.writeTxt( _fib, tableStream );
-        tableOffset = tableStream.getOffset();
+        tableOffset = tableStream.size();
 
         /*
          * plcffld*** (table of field positions and statuses for annotation
@@ -753,7 +752,7 @@ public final class HWPFDocument extends
         if ( _fieldsTables != null )
         {
             _fieldsTables.write( _fib, tableStream );
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         /*
@@ -768,7 +767,7 @@ public final class HWPFDocument extends
          */
         _footnotesTables.writeRef( _fib, tableStream );
         _footnotesTables.writeTxt( _fib, tableStream );
-        tableOffset = tableStream.getOffset();
+        tableOffset = tableStream.size();
 
         /*
          * plcfsed (section table) Written immediately after the previously
@@ -781,8 +780,8 @@ public final class HWPFDocument extends
         // write out the SectionTable.
         _fib.setFcPlcfsed(tableOffset);
         _st.writeTo(wordDocumentStream, tableStream);
-        _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
+        _fib.setLcbPlcfsed(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
 
         // write out the list tables
         if ( _lt != null )
@@ -800,7 +799,7 @@ public final class HWPFDocument extends
              * Specification; Page 25 of 210
              */
             _lt.writeListDataTo( _fib, tableStream );
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
 
             /*
              * plflfo (more list formats) Written immediately after the end of
@@ -814,7 +813,7 @@ public final class HWPFDocument extends
              * Specification; Page 26 of 210
              */
             _lt.writeListOverridesTo( _fib, tableStream );
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         /*
@@ -827,7 +826,7 @@ public final class HWPFDocument extends
         if ( _bookmarksTables != null )
         {
             _bookmarksTables.writeSttbfBkmk( _fib, tableStream );
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         /*
@@ -843,9 +842,9 @@ public final class HWPFDocument extends
         {
             _fib.setFcSttbSavedBy(tableOffset);
             _sbt.writeTo(tableStream);
-            _fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);
+            _fib.setLcbSttbSavedBy(tableStream.size() - tableOffset);
 
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         // write out the revision mark authors table.
@@ -853,21 +852,21 @@ public final class HWPFDocument extends
         {
             _fib.setFcSttbfRMark(tableOffset);
             _rmat.writeTo(tableStream);
-            _fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);
+            _fib.setLcbSttbfRMark(tableStream.size() - tableOffset);
 
-            tableOffset = tableStream.getOffset();
+            tableOffset = tableStream.size();
         }
 
         // write out the FontTable.
         _fib.setFcSttbfffn(tableOffset);
         _ft.writeTo(tableStream);
-        _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
-        tableOffset = tableStream.getOffset();
+        _fib.setLcbSttbfffn(tableStream.size() - tableOffset);
+        tableOffset = tableStream.size();
 
         // set some variables in the FileInformationBlock.
         _fib.getFibBase().setFcMin(fcMin);
         _fib.getFibBase().setFcMac(fcMac);
-        _fib.setCbMac(wordDocumentStream.getOffset());
+        _fib.setCbMac(wordDocumentStream.size());
 
         // make sure that the table, doc and data streams use big blocks.
         byte[] mainBuf = wordDocumentStream.toByteArray();

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java?rev=1797837&r1=1797836&r2=1797837&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocumentCore.java Tue Jun  6 22:21:11 2017
@@ -17,13 +17,19 @@
 
 package org.apache.poi.hwpf;
 
+import java.io.ByteArrayOutputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PushbackInputStream;
+import java.security.GeneralSecurityException;
 
+import org.apache.poi.EncryptedDocumentException;
 import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
 import org.apache.poi.hwpf.model.CHPBinTable;
+import org.apache.poi.hwpf.model.FibBase;
 import org.apache.poi.hwpf.model.FileInformationBlock;
 import org.apache.poi.hwpf.model.FontTable;
 import org.apache.poi.hwpf.model.ListTables;
@@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPie
 import org.apache.poi.hwpf.usermodel.ObjectPoolImpl;
 import org.apache.poi.hwpf.usermodel.ObjectsPool;
 import org.apache.poi.hwpf.usermodel.Range;
+import org.apache.poi.poifs.crypt.ChunkedCipherInputStream;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.crypt.EncryptionMode;
 import org.apache.poi.poifs.filesystem.DirectoryEntry;
 import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.DocumentEntry;
 import org.apache.poi.poifs.filesystem.DocumentInputStream;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.BoundedInputStream;
 import org.apache.poi.util.IOUtils;
 import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianByteArrayInputStream;
 
 
 /**
  * This class holds much of the core of a Word document, but
  *  without some of the table structure information.
  * You generally want to work with one of
- *  {@link HWPFDocument} or {@link HWPFOldDocument} 
+ *  {@link HWPFDocument} or {@link HWPFOldDocument}
  */
-public abstract class HWPFDocumentCore extends POIDocument
-{
+public abstract class HWPFDocumentCore extends POIDocument {
     protected static final String STREAM_OBJECT_POOL = "ObjectPool";
     protected static final String STREAM_WORD_DOCUMENT = "WordDocument";
+    protected static final String STREAM_TABLE_0 = "0Table";
+    protected static final String STREAM_TABLE_1 = "1Table";
 
-  /** Holds OLE2 objects */
-  protected ObjectPoolImpl _objectPool;
+    private static final int FIB_BASE_LEN = 68;
 
-  /** The FIB */
-  protected FileInformationBlock _fib;
+    /** Holds OLE2 objects */
+    protected ObjectPoolImpl _objectPool;
 
-  /** Holds styles for this document.*/
-  protected StyleSheet _ss;
+    /** The FIB */
+    protected FileInformationBlock _fib;
 
-  /** Contains formatting properties for text*/
-  protected CHPBinTable _cbt;
-
-  /** Contains formatting properties for paragraphs*/
-  protected PAPBinTable _pbt;
-
-  /** Contains formatting properties for sections.*/
-  protected SectionTable _st;
-
-  /** Holds fonts for this document.*/
-  protected FontTable _ft;
-
-  /** Hold list tables */
-  protected ListTables _lt;
-
-  /** main document stream buffer*/
-  protected byte[] _mainStream;
-
-  protected HWPFDocumentCore()
-  {
-     super((DirectoryNode)null);
-  }
-
-  /**
-   * Takes an InputStream, verifies that it's not RTF or PDF, builds a
-   *  POIFSFileSystem from it, and returns that.
-   */
-  public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
-	// Open a PushbackInputStream, so we can peek at the first few bytes
-	PushbackInputStream pis = new PushbackInputStream(istream,6);
-	byte[] first6 = IOUtils.toByteArray(pis, 6);
-
-	// Does it start with {\rtf ? If so, it's really RTF
-	if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
-		&& first6[3] == 't' && first6[4] == 'f') {
-		throw new IllegalArgumentException("The document is really a RTF file");
-	} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
-		throw new IllegalArgumentException("The document is really a PDF file");
-	}
-
-	// OK, so it's neither RTF nor PDF
-	// Open a POIFSFileSystem on the (pushed back) stream
-	pis.unread(first6);
-	return new POIFSFileSystem(pis);
-  }
-
-  /**
-   * This constructor loads a Word document from an InputStream.
-   *
-   * @param istream The InputStream that contains the Word document.
-   * @throws IOException If there is an unexpected IOException from the passed
-   *         in InputStream.
-   */
-  public HWPFDocumentCore(InputStream istream) throws IOException
-  {
-    //do Ole stuff
-    this( verifyAndBuildPOIFS(istream) );
-  }
-
-  /**
-   * This constructor loads a Word document from a POIFSFileSystem
-   *
-   * @param pfilesystem The POIFSFileSystem that contains the Word document.
-   * @throws IOException If there is an unexpected IOException from the passed
-   *         in POIFSFileSystem.
-   */
-  public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException
-  {
-	this(pfilesystem.getRoot());
-  }
-
-  /**
-   * This constructor loads a Word document from a specific point
-   *  in a POIFSFileSystem, probably not the default.
-   * Used typically to open embeded documents.
-   *
-   * @param directory The DirectoryNode that contains the Word document.
-   * @throws IOException If there is an unexpected IOException from the passed
-   *         in POIFSFileSystem.
-   */
-  public HWPFDocumentCore(DirectoryNode directory) throws IOException {
-    // Sort out the hpsf properties
-    super(directory);
-
-    // read in the main stream.
-    DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument");
-    DocumentInputStream dis = null;
-    try {
-        dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT);
-        _mainStream = IOUtils.toByteArray(dis, documentProps.getSize());
-    } finally {
-        if (dis != null) {
-            dis.close();
-        }
-    }
-
-    // Create our FIB, and check for the doc being encrypted
-    _fib = new FileInformationBlock(_mainStream);
-
-    DirectoryEntry objectPoolEntry;
-    try {
-      objectPoolEntry = (DirectoryEntry) directory
-              .getEntry(STREAM_OBJECT_POOL);
-    } catch (FileNotFoundException exc) {
-      objectPoolEntry = null;
+    /** Holds styles for this document.*/
+    protected StyleSheet _ss;
+
+    /** Contains formatting properties for text*/
+    protected CHPBinTable _cbt;
+
+    /** Contains formatting properties for paragraphs*/
+    protected PAPBinTable _pbt;
+
+    /** Contains formatting properties for sections.*/
+    protected SectionTable _st;
+
+    /** Holds fonts for this document.*/
+    protected FontTable _ft;
+
+    /** Hold list tables */
+    protected ListTables _lt;
+
+    /** main document stream buffer*/
+    protected byte[] _mainStream;
+
+    private EncryptionInfo _encryptionInfo;
+
+    protected HWPFDocumentCore() {
+        super((DirectoryNode)null);
     }
-    _objectPool = new ObjectPoolImpl(objectPoolEntry);
-  }
 
-  /**
+    /**
+     * Takes an InputStream, verifies that it's not RTF or PDF, builds a
+     *  POIFSFileSystem from it, and returns that.
+     */
+    public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
+    	// Open a PushbackInputStream, so we can peek at the first few bytes
+    	PushbackInputStream pis = new PushbackInputStream(istream,6);
+    	byte[] first6 = IOUtils.toByteArray(pis, 6);
+
+    	// Does it start with {\rtf ? If so, it's really RTF
+    	if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
+    		&& first6[3] == 't' && first6[4] == 'f') {
+    		throw new IllegalArgumentException("The document is really a RTF file");
+    	} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
+    		throw new IllegalArgumentException("The document is really a PDF file");
+    	}
+
+    	// OK, so it's neither RTF nor PDF
+    	// Open a POIFSFileSystem on the (pushed back) stream
+    	pis.unread(first6);
+    	return new POIFSFileSystem(pis);
+    }
+
+    /**
+     * This constructor loads a Word document from an InputStream.
+     *
+     * @param istream The InputStream that contains the Word document.
+     * @throws IOException If there is an unexpected IOException from the passed
+     *         in InputStream.
+     */
+    public HWPFDocumentCore(InputStream istream) throws IOException {
+        //do Ole stuff
+        this( verifyAndBuildPOIFS(istream) );
+    }
+
+    /**
+     * This constructor loads a Word document from a POIFSFileSystem
+     *
+     * @param pfilesystem The POIFSFileSystem that contains the Word document.
+     * @throws IOException If there is an unexpected IOException from the passed
+     *         in POIFSFileSystem.
+     */
+    public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException {
+        this(pfilesystem.getRoot());
+    }
+
+    /**
+     * This constructor loads a Word document from a specific point
+     *  in a POIFSFileSystem, probably not the default.
+     * Used typically to open embeded documents.
+     *
+     * @param directory The DirectoryNode that contains the Word document.
+     * @throws IOException If there is an unexpected IOException from the passed
+     *         in POIFSFileSystem.
+     */
+    public HWPFDocumentCore(DirectoryNode directory) throws IOException {
+        // Sort out the hpsf properties
+        super(directory);
+
+        // read in the main stream.
+        _mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE);
+        _fib = new FileInformationBlock(_mainStream);
+
+        DirectoryEntry objectPoolEntry = null;
+        if (directory.hasEntry(STREAM_OBJECT_POOL)) {
+            objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL);
+        }
+        _objectPool = new ObjectPoolImpl(objectPoolEntry);
+    }
+
+    /**
+     * For a given named property entry, either return it or null if
+     * if it wasn't found
+     *
+     * @param setName The property to read
+     * @return The value of the given property or null if it wasn't found.
+     */
+    @Override
+    protected PropertySet getPropertySet(String setName) {
+        EncryptionInfo ei;
+        try {
+            ei = getEncryptionInfo();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+        return (ei == null)
+            ? super.getPropertySet(setName)
+            : super.getPropertySet(setName, ei);
+    }
+
+    protected EncryptionInfo getEncryptionInfo() throws IOException {
+        if (_encryptionInfo != null) {
+            return _encryptionInfo;
+        }
+
+        // Create our FIB, and check for the doc being encrypted
+        byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN);
+        FibBase fibBase = new FibBase( fibBaseBytes, 0 );
+        if (!fibBase.isFEncrypted()) {
+            return null;
+        }
+
+        String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0;
+        byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey());
+        LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream);
+        EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null;
+        EncryptionInfo ei = new EncryptionInfo(leis, em);
+        Decryptor dec = ei.getDecryptor();
+        dec.setChunkSize(512);
+        try {
+            String pass = Biff8EncryptionKey.getCurrentUserPassword();
+            if (pass == null) {
+                pass = Decryptor.DEFAULT_PASSWORD;
+            }
+            if (!dec.verifyPassword(pass)) {
+                throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening");
+            }
+        } catch (GeneralSecurityException e) {
+            throw new IOException(e.getMessage(), e);
+        }
+        _encryptionInfo = ei;
+        return ei;
+    }
+
+    /**
+     * Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available,
+     * decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try
+     * to decrypt the bytes
+     *
+     * @param name the name of the stream
+     * @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption
+     * @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes
+     * @return the read bytes
+     * @throws IOException if the stream can't be found
+     */
+    protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException {
+        DirectoryNode dir = getDirectory();
+        DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name);
+        DocumentInputStream dis = dir.createDocumentInputStream(documentProps);
+        EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null;
+        int streamSize = documentProps.getSize();
+        ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len));
+
+        InputStream is = dis;
+        try {
+            if (ei != null) {
+                try {
+                    Decryptor dec = ei.getDecryptor();
+                    is = dec.getDataStream(dis, streamSize, 0);
+                    if (encryptionOffset > 0) {
+                        ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is;
+                        byte plain[] = new byte[encryptionOffset];
+                        cis.readPlain(plain, 0, encryptionOffset);
+                        bos.write(plain);
+                    }
+                } catch (GeneralSecurityException e) {
+                    throw new IOException(e.getMessage(), e);
+                }
+            }
+            // This simplifies a few combinations, so we actually always try to copy len bytes
+            // regardless if encryptionOffset is greater than 0
+            if (len < Integer.MAX_VALUE) {
+                is = new BoundedInputStream(is, len);
+            }
+            IOUtils.copy(is, bos);
+            return bos.toByteArray();
+        } finally {
+            IOUtils.closeQuietly(is);
+            IOUtils.closeQuietly(dis);
+        }
+    }
+
+
+    /**
      * Returns the range which covers the whole of the document, but excludes
      * any headers and footers.
      */
@@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore e
     @Internal
     public abstract StringBuilder getText();
 
-  public CHPBinTable getCharacterTable()
-  {
-    return _cbt;
-  }
-
-  public PAPBinTable getParagraphTable()
-  {
-    return _pbt;
-  }
-
-  public SectionTable getSectionTable()
-  {
-    return _st;
-  }
-
-  public StyleSheet getStyleSheet()
-  {
-    return _ss;
-  }
-
-  public ListTables getListTables()
-  {
-    return _lt;
-  }
-
-  public FontTable getFontTable()
-  {
-    return _ft;
-  }
-
-  public FileInformationBlock getFileInformationBlock()
-  {
-    return _fib;
-  }
+    public CHPBinTable getCharacterTable() {
+        return _cbt;
+    }
+
+    public PAPBinTable getParagraphTable() {
+        return _pbt;
+    }
+
+    public SectionTable getSectionTable() {
+        return _st;
+    }
+
+    public StyleSheet getStyleSheet() {
+        return _ss;
+    }
+
+    public ListTables getListTables() {
+        return _lt;
+    }
+
+    public FontTable getFontTable() {
+        return _ft;
+    }
+
+    public FileInformationBlock getFileInformationBlock() {
+        return _fib;
+    }
 
-    public ObjectsPool getObjectsPool()
-    {
+    public ObjectsPool getObjectsPool() {
         return _objectPool;
     }
 
@@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore e
     public byte[] getMainStream() {
         return _mainStream;
     }
-}
+}
\ No newline at end of file

Added: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java?rev=1797837&view=auto
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java (added)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java Tue Jun  6 22:21:11 2017
@@ -0,0 +1,69 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.hwpf;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.junit.AfterClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameter;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(Parameterized.class)
+public class HWPFTestEncryption {
+    @AfterClass
+    public static void clearPass() {
+        Biff8EncryptionKey.setCurrentUserPassword(null);
+    }
+
+    @Parameter(value = 0)
+    public String file;
+
+    @Parameter(value = 1)
+    public String password;
+
+    @Parameter(value = 2)
+    public String expected;
+
+    @Parameters(name="{0}")
+    public static Collection<String[]> data() {
+        return Arrays.asList(
+            new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." },
+            new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" }
+        );
+    }
+
+    @Test
+    public void extract() throws IOException {
+        Biff8EncryptionKey.setCurrentUserPassword(password);
+        HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file);
+        WordExtractor we = new WordExtractor(docD);
+        String actual = we.getText().trim();
+        assertEquals(expected, actual);
+        we.close();
+        docD.close();
+    }
+}

Propchange: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestEncryption.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: poi/trunk/test-data/document/password_password_cryptoapi.doc
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/password_password_cryptoapi.doc?rev=1797837&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/document/password_password_cryptoapi.doc
------------------------------------------------------------------------------
    svn:mime-type = application/msword

Added: poi/trunk/test-data/document/password_tika_binaryrc4.doc
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/password_tika_binaryrc4.doc?rev=1797837&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/document/password_tika_binaryrc4.doc
------------------------------------------------------------------------------
    svn:mime-type = application/msword



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org