You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2016/04/10 13:16:49 UTC
svn commit: r1738418 - in /poi/trunk/src/java/org/apache/poi: poifs/macros/
poifs/macros/VBAMacroExtractor.java util/RLEDecompressingInputStream.java
Author: nick
Date: Sun Apr 10 11:16:49 2016
New Revision: 1738418
URL: http://svn.apache.org/viewvc?rev=1738418&view=rev
Log:
VBA extraction support from bug #52949 from Barry Lagerweij
Added:
poi/trunk/src/java/org/apache/poi/poifs/macros/
poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java
poi/trunk/src/java/org/apache/poi/util/RLEDecompressingInputStream.java
Added: poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java?rev=1738418&view=auto
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java (added)
+++ poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java Sun Apr 10 11:16:49 2016
@@ -0,0 +1,188 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.poifs.macros;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.apache.poi.poifs.eventfilesystem.POIFSReader;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
+import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.RLEDecompressingInputStream;
+
+/**
+ * This class is able to extract the source of all VBA Modules of an Excel file.
+ */
+public class VBAMacroExtractor {
+
+ /**
+ * Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files.
+ *
+ * @param in
+ * @return
+ * @throws IOException
+ */
+ public Map<String, String> extractMacros(InputStream in) throws IOException {
+ PushbackInputStream bpin = new PushbackInputStream(in, 2);
+ byte[] header = new byte[2];
+ if (bpin.read(header) != 2) {
+ throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");
+ }
+ bpin.unread(header);
+ if (header[0] == 'P' && header[1] == 'K') {
+ ZipInputStream zis = new ZipInputStream(bpin);
+ ZipEntry zipEntry;
+ while ((zipEntry = zis.getNextEntry()) != null) {
+ if ("xl/vbaProject.bin".equals(zipEntry.getName())) {
+ try {
+ return extractMacrosFromPOIFSInputStream(zis);
+ } finally {
+ zis.closeEntry();
+ }
+ }
+ }
+ return null;
+ } else {
+ return extractMacrosFromPOIFSInputStream(bpin);
+ }
+ }
+
+ /**
+ * Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or
+ * vbaProject.bin from OOXML files)
+ *
+ * @param in
+ * @return
+ * @throws IOException
+ */
+ public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {
+ class Module {
+
+ Integer offset;
+ byte[] buf;
+ }
+ class ModuleMap extends HashMap<String, Module> {
+
+ Charset charset = Charset.forName("Cp1252"); // default charset
+ }
+ try {
+ final ModuleMap modules = new ModuleMap();
+ POIFSReader dirReader = new POIFSReader();
+ dirReader.registerListener(new POIFSReaderListener() {
+
+ public void processPOIFSReaderEvent(POIFSReaderEvent event) {
+ try {
+ String name = event.getName();
+ if (event.getPath().toString().endsWith("\\VBA")) {
+ if ("dir".equals(name)) {
+ // process DIR
+ RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());
+ String streamName = null;
+ while (true) {
+ int id = in.readShort();
+ if (id == -1 || id == 0x0010) {
+ break; // EOF or TERMINATOR
+ }
+ int len = in.readInt();
+ switch (id) {
+ case 0x0009: // PROJECTVERSION
+ in.skip(6);
+ break;
+ case 0x0003: // PROJECTCODEPAGE
+ int codepage = in.readShort();
+ modules.charset = Charset.forName("Cp" + codepage);
+ break;
+ case 0x001A: // STREAMNAME
+ byte[] streamNameBuf = new byte[len];
+ int count = in.read(streamNameBuf);
+ streamName = new String(streamNameBuf, 0, count, modules.charset);
+ break;
+ case 0x0031: // MODULEOFFSET
+ int moduleOffset = in.readInt();
+ Module module = modules.get(streamName);
+ if (module != null) {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(
+ module.buf, moduleOffset, module.buf.length - moduleOffset));
+ IOUtils.copy(stream, out);
+ stream.close();
+ out.close();
+ module.buf = out.toByteArray();
+ } else {
+ module = new Module();
+ module.offset = moduleOffset;
+ modules.put(streamName, module);
+ }
+ break;
+ default:
+ in.skip(len);
+ break;
+ }
+ }
+ } else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
+ // process module, skip __SRP and _VBA_PROJECT since these do not contain macros
+ Module module = modules.get(name);
+ final DocumentInputStream stream = event.getStream();
+ final InputStream in;
+ if (module == null) {
+ // no DIR stream with offsets yet, so store the compressed bytes for later
+ module = new Module();
+ modules.put(name, module);
+ in = stream;
+ } else {
+ // we know the offset already, so decompress immediately on-the-fly
+ stream.skip(module.offset);
+ in = new RLEDecompressingInputStream(stream);
+ }
+ final ByteArrayOutputStream out = new ByteArrayOutputStream();
+ IOUtils.copy(in, out);
+ in.close();
+ out.close();
+ module.buf = out.toByteArray();
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ });
+ dirReader.read(in);
+ Map<String, String> moduleSources = new HashMap<String, String>();
+ for (Map.Entry<String, Module> entry : modules.entrySet()) {
+ Module module = entry.getValue();
+ if (module.buf != null && module.buf.length > 0) { // Skip empty modules
+ moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
+ }
+ }
+ return moduleSources;
+ } catch (IOException e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+}
Added: poi/trunk/src/java/org/apache/poi/util/RLEDecompressingInputStream.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/util/RLEDecompressingInputStream.java?rev=1738418&view=auto
==============================================================================
--- poi/trunk/src/java/org/apache/poi/util/RLEDecompressingInputStream.java (added)
+++ poi/trunk/src/java/org/apache/poi/util/RLEDecompressingInputStream.java Sun Apr 10 11:16:49 2016
@@ -0,0 +1,273 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Wrapper of InputStream which provides Run Length Encoding (RLE)
+ * decompression on the fly. Uses MS-OVBA decompression algorithm. See
+ * http://download.microsoft.com/download/2/4/8/24862317-78F0-4C4B-B355-C7B2C1D997DB/[MS-OVBA].pdf
+ */
+public class RLEDecompressingInputStream extends InputStream {
+
+ /**
+ * Bitmasks for performance
+ */
+ private static final int[] POWER2 = new int[] { 0x0001, // 0
+ 0x0002, // 1
+ 0x0004, // 2
+ 0x0008, // 3
+ 0x0010, // 4
+ 0x0020, // 5
+ 0x0040, // 6
+ 0x0080, // 7
+ 0x0100, // 8
+ 0x0200, // 9
+ 0x0400, // 10
+ 0x0800, // 11
+ 0x1000, // 12
+ 0x2000, // 13
+ 0x4000, // 14
+ 0x8000 // 15
+ };
+
+ /** the wrapped inputstream */
+ private InputStream in;
+
+ /** a byte buffer with size 4096 for storing a single chunk */
+ private byte[] buf;
+
+ /** the current position in the byte buffer for reading */
+ private int pos;
+
+ /** the number of bytes in the byte buffer */
+ private int len;
+
+ /**
+ * Creates a new wrapper RLE Decompression InputStream.
+ *
+ * @param in
+ * @throws IOException
+ */
+ public RLEDecompressingInputStream(InputStream in) throws IOException {
+ this.in = in;
+ buf = new byte[4096];
+ pos = 0;
+ int header = in.read();
+ if (header != 0x01) {
+ throw new IllegalArgumentException(String.format("Header byte 0x01 expected, received 0x%02X", header & 0xFF));
+ }
+ len = readChunk();
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (len == -1) {
+ return -1;
+ }
+ if (pos >= len) {
+ if ((len = readChunk()) == -1) {
+ return -1;
+ }
+ }
+ return buf[pos++];
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ return read(b, 0, b.length);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int l) throws IOException {
+ if (len == -1) {
+ return -1;
+ }
+ int offset = off;
+ int length = l;
+ while (length > 0) {
+ if (pos >= len) {
+ if ((len = readChunk()) == -1) {
+ return offset > off ? offset - off : -1;
+ }
+ }
+ int c = Math.min(length, len - pos);
+ System.arraycopy(buf, pos, b, offset, c);
+ pos += c;
+ length -= c;
+ offset += c;
+ }
+ return l;
+ }
+
+ @Override
+ public long skip(long n) throws IOException {
+ long length = n;
+ while (length > 0) {
+ if (pos >= len) {
+ if ((len = readChunk()) == -1) {
+ return -1;
+ }
+ }
+ int c = (int) Math.min(n, len - pos);
+ pos += c;
+ length -= c;
+ }
+ return n;
+ }
+
+ @Override
+ public int available() {
+ return (len > 0 ? len - pos : 0);
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+
+ /**
+ * Reads a single chunk from the underlying inputstream.
+ *
+ * @return
+ * @throws IOException
+ */
+ private int readChunk() throws IOException {
+ pos = 0;
+ int w = readShort(in);
+ if (w == -1) {
+ return -1;
+ }
+ int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length
+ if ((w & 0x7000) != 0x3000) {
+ throw new IllegalArgumentException(String.format("Chunksize header A should be 0x3000, received 0x%04X", w & 0xE000));
+ }
+ boolean rawChunk = (w & 0x8000) == 0;
+ if (rawChunk) {
+ if (in.read(buf, 0, chunkSize) < chunkSize) {
+ throw new IllegalStateException(String.format("Not enough bytes read, expected %d", chunkSize));
+ }
+ return chunkSize;
+ } else {
+ int inOffset = 0;
+ int outOffset = 0;
+ while (inOffset < chunkSize) {
+ int tokenFlags = in.read();
+ inOffset++;
+ if (tokenFlags == -1) {
+ break;
+ }
+ for (int n = 0; n < 8; n++) {
+ if (inOffset >= chunkSize) {
+ break;
+ }
+ if ((tokenFlags & POWER2[n]) == 0) {
+ // literal
+ final int b = in.read();
+ if (b == -1) {
+ return -1;
+ }
+ buf[outOffset++] = (byte) b;
+ inOffset++;
+ } else {
+ // compressed token
+ int token = readShort(in);
+ if (token == -1) {
+ return -1;
+ }
+ inOffset += 2;
+ int copyLenBits = getCopyLenBits(outOffset - 1);
+ int copyOffset = (token >> (copyLenBits)) + 1;
+ int copyLen = (token & (POWER2[copyLenBits] - 1)) + 3;
+ int startPos = outOffset - copyOffset;
+ int endPos = startPos + copyLen;
+ for (int i = startPos; i < endPos; i++) {
+ buf[outOffset++] = buf[i];
+ }
+ }
+ }
+ }
+ return outOffset;
+ }
+ }
+
+ /**
+ * Helper method to determine how many bits in the CopyToken are used for the CopyLength.
+ *
+ * @param offset
+ * @return
+ */
+ static int getCopyLenBits(int offset) {
+ for (int n = 11; n >= 4; n--) {
+ if ((offset & POWER2[n]) != 0) {
+ return 15 - n;
+ }
+ }
+ return 12;
+ }
+
+ /**
+ * Convenience method for read a 2-bytes short in little endian encoding.
+ *
+ * @return
+ * @throws IOException
+ */
+ public int readShort() throws IOException {
+ return readShort(this);
+ }
+
+ /**
+ * Convenience method for read a 4-bytes int in little endian encoding.
+ *
+ * @return
+ * @throws IOException
+ */
+ public int readInt() throws IOException {
+ return readInt(this);
+ }
+
+ private int readShort(InputStream stream) throws IOException {
+ int b0, b1;
+ if ((b0 = stream.read()) == -1) {
+ return -1;
+ }
+ if ((b1 = stream.read()) == -1) {
+ return -1;
+ }
+ return (b0 & 0xFF) | ((b1 & 0xFF) << 8);
+ }
+
+ private int readInt(InputStream stream) throws IOException {
+ int b0, b1, b2, b3;
+ if ((b0 = stream.read()) == -1) {
+ return -1;
+ }
+ if ((b1 = stream.read()) == -1) {
+ return -1;
+ }
+ if ((b2 = stream.read()) == -1) {
+ return -1;
+ }
+ if ((b3 = stream.read()) == -1) {
+ return -1;
+ }
+ return (b0 & 0xFF) | ((b1 & 0xFF) << 8) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 24);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org