You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2018/10/29 15:48:44 UTC
svn commit: r1845138 - in /poi:
site/src/documentation/content/xdocs/changes.xml
trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
trunk/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java
trunk/test-data/spreadsheet/62624.bin
Author: tallison
Date: Mon Oct 29 15:48:44 2018
New Revision: 1845138
URL: http://svn.apache.org/viewvc?rev=1845138&view=rev
Log:
bug 62624 -- fix npe by adding processing of module name mapping in VBAMacroReader
Added:
poi/trunk/test-data/spreadsheet/62624.bin (with props)
Modified:
poi/site/src/documentation/content/xdocs/changes.xml
poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
poi/trunk/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java
Modified: poi/site/src/documentation/content/xdocs/changes.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/changes.xml?rev=1845138&r1=1845137&r2=1845138&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/changes.xml (original)
+++ poi/site/src/documentation/content/xdocs/changes.xml Mon Oct 29 15:48:44 2018
@@ -95,6 +95,7 @@
<summary-item>Upgrade to XMLBeans 3.0.2</summary-item>
</summary>
<actions>
+ <action dev="PD" type="fix" fixes-bug="62624" context="POI_Overall">Handle module name mapping in VBAMacroReader</action>
<action dev="PD" type="fix" fixes-bug="62859" context="XWPF">Rare NPE while creating XWPFSDTContent</action>
<action dev="PD" type="add" fixes-bug="62373" context="SS_Common">Support for FREQUENCY function</action>
<action dev="PD" type="fix" fixes-bug="62831" context="POI_Overall">WorkbookFactory.create support for subclass of File, eg from JFileChooser</action>
Modified: poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java?rev=1845138&r1=1845137&r2=1845138&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java (original)
+++ poi/trunk/src/java/org/apache/poi/poifs/macros/VBAMacroReader.java Mon Oct 29 15:48:44 2018
@@ -20,9 +20,19 @@ package org.apache.poi.poifs.macros;
import static org.apache.poi.util.StringUtil.endsWithIgnoreCase;
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
-import java.io.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.Closeable;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@@ -39,6 +49,8 @@ import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
import org.apache.poi.util.RLEDecompressingInputStream;
import org.apache.poi.util.StringUtil;
@@ -56,6 +68,8 @@ import org.apache.poi.util.StringUtil;
* @since 3.15-beta2
*/
public class VBAMacroReader implements Closeable {
+ private static final POILogger LOGGER = POILogFactory.getLogger(VBAMacroReader.class);
+
protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
protected static final String VBA_PROJECT_POIFS = "VBA";
@@ -111,8 +125,13 @@ public class VBAMacroReader implements C
public Map<String, Module> readMacroModules() throws IOException {
final ModuleMap modules = new ModuleMap();
+ //ascii -> unicode mapping for module names
+ //preserve insertion order
+ final Map<String, String> moduleNameMap = new LinkedHashMap<>();
+
findMacros(fs.getRoot(), modules);
- findProjectProperties(fs.getRoot(), modules);
+ findModuleNameMap(fs.getRoot(), moduleNameMap, modules);
+ findProjectProperties(fs.getRoot(), moduleNameMap, modules);
Map<String, Module> moduleSources = new HashMap<>();
for (Map.Entry<String, ModuleImpl> entry : modules.entrySet()) {
@@ -327,16 +346,33 @@ public class VBAMacroReader implements C
}
}
- protected void findProjectProperties(DirectoryNode node, ModuleMap modules) throws IOException {
+ protected void findProjectProperties(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
for (Entry entry : node) {
if ("project".equalsIgnoreCase(entry.getName())) {
DocumentNode document = (DocumentNode)entry;
DocumentInputStream dis = new DocumentInputStream(document);
- readProjectProperties(dis, modules);
+ readProjectProperties(dis, moduleNameMap, modules);
+ } else {
+ for (Entry child : node) {
+ if (child instanceof DirectoryNode) {
+ findProjectProperties((DirectoryNode)child, moduleNameMap, modules);
+ }
+ }
+
+ }
+ }
+ }
+
+ protected void findModuleNameMap(DirectoryNode node, Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
+ for (Entry entry : node) {
+ if ("projectwm".equalsIgnoreCase(entry.getName())) {
+ DocumentNode document = (DocumentNode)entry;
+ DocumentInputStream dis = new DocumentInputStream(document);
+ readNameMapRecords(dis, moduleNameMap, modules.charset);
} else {
for (Entry child : node) {
if (child instanceof DirectoryNode) {
- findProjectProperties((DirectoryNode)child, modules);
+ findModuleNameMap((DirectoryNode)child, moduleNameMap, modules);
}
}
@@ -559,6 +595,75 @@ public class VBAMacroReader implements C
return new ASCIIUnicodeStringPair(ascii, unicode);
}
+ private static void readNameMapRecords(InputStream is, Map<String, String> moduleNames, Charset charset) throws IOException {
+ //see 2.3.3 PROJECTwm Stream: Module Name Information
+ //multibytecharstring
+ String mbcs = null;
+ String unicode = null;
+ do {
+ try {
+ mbcs = readMBCS(is, charset);
+ } catch (EOFException e) {
+ return;
+ }
+ if (mbcs == null) {
+ return;
+ }
+ try {
+ unicode = readUnicode(is);
+ } catch (EOFException e) {
+ return;
+ }
+ if (mbcs != null && unicode != null) {
+ moduleNames.put(mbcs, unicode);
+ }
+ } while (mbcs != null && unicode != null);
+ }
+
+ private static String readUnicode(InputStream is) throws IOException {
+ //reads null-terminated unicode string
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ int b0 = is.read();
+ int b1 = is.read();
+
+ while ((b0 + b1) != 0) {
+ if (b0 == -1 || b1 == -1) {
+ throw new EOFException();
+ }
+
+ bos.write(b0);
+ bos.write(b1);
+ b0 = is.read();
+ b1 = is.read();
+ }
+ return new String (bos.toByteArray(), StandardCharsets.UTF_16LE);
+ }
+
+ //returns a string if any bytes are read or null if two 0x00 are read
+ private static String readMBCS(InputStream is, Charset charset) throws IOException {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ int len = 0;
+ int b = is.read();
+ while (b != 0) {
+ ++len;
+ if (b == -1) {
+ throw new EOFException();
+ }
+ bos.write(b);
+ b = is.read();
+ }
+ if (len == 0) {
+ b = is.read();
+ if (b == -1) {
+ throw new EOFException();
+ }
+ if (b != 0) {
+ LOGGER.log(POILogger.WARN, "expected two 0x00 at end of module name map");
+ }
+ return null;
+ }
+ return new String(bos.toByteArray(), charset);
+ }
/**
* Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
@@ -579,7 +684,8 @@ public class VBAMacroReader implements C
return new String(buffer, 0, length, charset);
}
- protected void readProjectProperties(DocumentInputStream dis, ModuleMap modules) throws IOException {
+ protected void readProjectProperties(DocumentInputStream dis,
+ Map<String, String> moduleNameMap, ModuleMap modules) throws IOException {
InputStreamReader reader = new InputStreamReader(dis, modules.charset);
StringBuilder builder = new StringBuilder();
char[] buffer = new char[512];
@@ -588,6 +694,9 @@ public class VBAMacroReader implements C
builder.append(buffer, 0, read);
}
String properties = builder.toString();
+ //the module name map names should be in exactly the same order
+ //as the module names here. See 2.3.3 PROJECTwm Stream.
+ //At some point, we might want to enforce that.
for (String line : properties.split("\r\n|\n\r")) {
if (!line.startsWith("[")) {
String[] tokens = line.split("=");
@@ -595,22 +704,40 @@ public class VBAMacroReader implements C
&& tokens[1].startsWith("\"") && tokens[1].endsWith("\"")) {
// Remove any double quotes
tokens[1] = tokens[1].substring(1, tokens[1].length() - 1);
-
}
- if ("Document".equals(tokens[0])) {
+ if ("Document".equals(tokens[0]) && tokens.length > 1) {
String mn = tokens[1].substring(0, tokens[1].indexOf("/&H"));
- ModuleImpl module = modules.get(mn);
- module.moduleType = ModuleType.Document;
- } else if ("Module".equals(tokens[0])) {
- ModuleImpl module = modules.get(tokens[1]);
- module.moduleType = ModuleType.Module;
- } else if ("Class".equals(tokens[0])) {
- ModuleImpl module = modules.get(tokens[1]);
- module.moduleType = ModuleType.Class;
+ ModuleImpl module = getModule(mn, moduleNameMap, modules);
+ if (module != null) {
+ module.moduleType = ModuleType.Document;
+ } else {
+ LOGGER.log(POILogger.WARN, "couldn't find module with name: "+mn);
+ }
+ } else if ("Module".equals(tokens[0]) && tokens.length > 1) {
+ ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
+ if (module != null) {
+ module.moduleType = ModuleType.Module;
+ } else {
+ LOGGER.log(POILogger.WARN, "couldn't find module with name: "+tokens[1]);
+ }
+ } else if ("Class".equals(tokens[0]) && tokens.length > 1) {
+ ModuleImpl module = getModule(tokens[1], moduleNameMap, modules);
+ if (module != null) {
+ module.moduleType = ModuleType.Class;
+ } else {
+ LOGGER.log(POILogger.WARN, "couldn't find module with name: "+tokens[1]);
+ }
}
}
}
}
+ //can return null!
+ private ModuleImpl getModule(String moduleName, Map<String, String> moduleNameMap, ModuleMap moduleMap) {
+ if (moduleNameMap.containsKey(moduleName)) {
+ return moduleMap.get(moduleNameMap.get(moduleName));
+ }
+ return moduleMap.get(moduleName);
+ }
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, 20000);
Modified: poi/trunk/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java?rev=1845138&r1=1845137&r2=1845138&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java (original)
+++ poi/trunk/src/testcases/org/apache/poi/poifs/macros/TestVBAMacroReader.java Mon Oct 29 15:48:44 2018
@@ -286,4 +286,18 @@ public class TestVBAMacroReader {
assertContains(content, "Attribute VB_Customizable = True");
r.close();
}
+
+ @Test
+ public void bug62624() throws IOException {
+ //macro comes from Common Crawl: HRLOXHGMGLFIJQQU27RIWXOARRHAAAAS
+ File f = POIDataSamples.getSpreadSheetInstance().getFile("62624.bin");
+ VBAMacroReader r = new VBAMacroReader(f);
+
+ Map<String, Module> macros = r.readMacroModules();
+ assertEquals(13, macros.size());
+ assertNotNull(macros.get("M\u00F3dulo1"));
+ assertContains(macros.get("M\u00F3dulo1").getContent(), "Calcula_tributos");
+ assertEquals(Module.ModuleType.Module, macros.get("M\u00F3dulo1").geModuleType());
+ r.close();
+ }
}
Added: poi/trunk/test-data/spreadsheet/62624.bin
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/62624.bin?rev=1845138&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/62624.bin
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org