You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2010/05/05 19:49:59 UTC

svn commit: r941399 - in /poi/trunk: src/documentation/content/xdocs/ src/ooxml/java/org/apache/poi/xssf/usermodel/ src/ooxml/java/org/apache/poi/xssf/util/ src/ooxml/testcases/org/apache/poi/xssf/usermodel/ src/ooxml/testcases/org/apache/poi/xssf/util...

Author: nick
Date: Wed May  5 17:49:59 2010
New Revision: 941399

URL: http://svn.apache.org/viewvc?rev=941399&view=rev
Log:
Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags

Added:
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
    poi/trunk/test-data/spreadsheet/BrNotClosed.xlsx   (with props)
Modified:
    poi/trunk/src/documentation/content/xdocs/status.xml
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java

Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=941399&r1=941398&r2=941399&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Wed May  5 17:49:59 2010
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.7-SNAPSHOT" date="2010-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags</action>
            <action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action>
            <action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action>
            <action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action>

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java?rev=941399&r1=941398&r2=941399&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFVMLDrawing.java Wed May  5 17:49:59 2010
@@ -20,6 +20,7 @@ package org.apache.poi.xssf.usermodel;
 import org.apache.poi.POIXMLDocumentPart;
 import org.apache.poi.openxml4j.opc.PackagePart;
 import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream;
 import org.apache.xmlbeans.XmlException;
 import org.apache.xmlbeans.XmlOptions;
 import org.apache.xmlbeans.XmlObject;
@@ -53,6 +54,11 @@ import schemasMicrosoftComOfficeExcel.ST
  * considered a deprecated format included in Office Open XML for legacy reasons only and new applications that
  * need a file format for drawings are strongly encouraged to use preferentially DrawingML
  * </p>
+ * 
+ * <p>
+ * Warning - Excel is known to put invalid XML into these files!
+ *  For example, &gt;br&lt; without being closed or escaped crops up.
+ * </p>
  *
  * See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf
  *
@@ -98,7 +104,9 @@ public final class XSSFVMLDrawing extend
 
 
     protected void read(InputStream is) throws IOException, XmlException {
-        XmlObject root = XmlObject.Factory.parse(is);
+        XmlObject root = XmlObject.Factory.parse(
+              new EvilUnclosedBRFixingInputStream(is)
+        );
 
         _qnames = new ArrayList<QName>();
         _items = new ArrayList<XmlObject>();

Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java?rev=941399&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/util/EvilUnclosedBRFixingInputStream.java Wed May  5 17:49:59 2010
@@ -0,0 +1,116 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+/**
+ * This is a seriously sick fix for the fact that some .xlsx
+ *  files contain raw bits of HTML, without being escaped
+ *  or properly turned into XML.
+ * The result is that they contain things like &gt;br&lt;,
+ *  which breaks the XML parsing.
+ * This very sick InputStream wrapper attempts to spot
+ *  these go past, and fix them.
+ * Only works for UTF-8 and US-ASCII based streams!
+ * It should only be used where experience shows the problem
+ *  can occur...
+ */
+public class EvilUnclosedBRFixingInputStream extends InputStream {
+   private InputStream source;
+   private byte[] spare;
+   
+   private static byte[] detect = new byte[] {
+      (byte)'<', (byte)'b', (byte)'r', (byte)'>'
+   };
+   
+   public EvilUnclosedBRFixingInputStream(InputStream source) {
+      this.source = source;
+   }
+
+   /**
+    * Warning - doesn't fix!
+    */
+   @Override
+   public int read() throws IOException {
+      return source.read();
+   }
+
+   @Override
+   public int read(byte[] b, int off, int len) throws IOException {
+      if(spare != null) {
+         // This is risky, but spare is normally only a byte or two...
+         System.arraycopy(spare, 0, b, off, spare.length);
+         int ret = spare.length;
+         spare = null;
+         return ret;
+      }
+      
+      int read = source.read(b, off, len);
+      read = fixUp(b, off, read);
+      return read;
+   }
+
+   @Override
+   public int read(byte[] b) throws IOException {
+      return this.read(b, 0, b.length);
+   }
+
+   private int fixUp(byte[] b, int offset, int read) {
+      // Find places to fix
+      ArrayList<Integer> fixAt = new ArrayList<Integer>();
+      for(int i=offset; i<offset+read-4; i++) {
+         boolean going = true;
+         for(int j=0; j<detect.length && going; j++) {
+            if(b[i+j] != detect[j]) {
+               going = false;
+            }
+         }
+         if(going) {
+            fixAt.add(i);
+         }
+      }
+      
+      if(fixAt.size()==0) {
+         return read;
+      }
+      
+      // Save a bit, if needed to fit
+      int overshoot = offset+read+fixAt.size() - b.length;  
+      if(overshoot > 0) {
+         spare = new byte[overshoot];
+         System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
+         read -= overshoot;
+      }
+      
+      // Fix them, in reverse order so the
+      //  positions are valid
+      for(int j=fixAt.size()-1; j>=0; j--) {
+         int i = fixAt.get(j); 
+
+         byte[] tmp = new byte[read-i-3];
+         System.arraycopy(b, i+3, tmp, 0, tmp.length);
+         b[i+3] = (byte)'/';
+         System.arraycopy(tmp, 0, b, i+4, tmp.length);
+         // It got one longer
+         read++;
+      }
+      return read;
+   }
+}

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java?rev=941399&r1=941398&r2=941399&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java Wed May  5 17:49:59 2010
@@ -138,4 +138,14 @@ public final class TestXSSFBugs extends 
         assertEquals(1, rels.size());
         assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment());
     }
+    
+    /**
+     * Excel will sometimes write a button with a textbox
+     *  containing &gt;br&lt; (not closed!).
+     * Clearly Excel shouldn't do this, but test that we can
+     *  read the file despite the naughtyness
+     */
+    public void test49020() throws Exception {
+       XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx");
+    }
 }

Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java?rev=941399&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/util/TestEvilUnclosedBRFixingInputStream.java Wed May  5 17:49:59 2010
@@ -0,0 +1,79 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
+import junit.framework.TestCase;
+
+public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
+   public void testOK() throws Exception {
+      byte[] ok = "<p><div>Hello There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
+      
+      EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
+            new ByteArrayInputStream(ok)
+      );
+      
+      ByteArrayOutputStream bout = new ByteArrayOutputStream();
+      boolean going = true;
+      while(going) {
+         byte[] b = new byte[1024];
+         int r = inp.read(b);
+         if(r > 0) {
+            bout.write(b, 0, r);
+         } else {
+            going = false;
+         }
+      }
+      
+      byte[] result = bout.toByteArray();
+      assertEquals(ok, result);
+   }
+   
+   public void testProblem() throws Exception {
+      byte[] orig = "<p><div>Hello<br>There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
+      byte[] fixed = "<p><div>Hello<br/>There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
+      
+      EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
+            new ByteArrayInputStream(orig)
+      );
+      
+      ByteArrayOutputStream bout = new ByteArrayOutputStream();
+      boolean going = true;
+      while(going) {
+         byte[] b = new byte[1024];
+         int r = inp.read(b);
+         if(r > 0) {
+            bout.write(b, 0, r);
+         } else {
+            going = false;
+         }
+      }
+      
+      byte[] result = bout.toByteArray();
+      assertEquals(fixed, result);
+   }
+   
+   protected void assertEquals(byte[] a, byte[] b) {
+      assertEquals(a.length, b.length);
+      for(int i=0; i<a.length; i++) {
+         assertEquals("Wrong byte at index " + i, a[i], b[i]);
+      }
+   }
+}

Added: poi/trunk/test-data/spreadsheet/BrNotClosed.xlsx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/BrNotClosed.xlsx?rev=941399&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/BrNotClosed.xlsx
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org