You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2017/01/19 20:19:26 UTC

svn commit: r1779519 - in /poi/trunk/src/scratchpad: src/org/apache/poi/hwmf/draw/HwmfGraphics.java src/org/apache/poi/hwmf/record/HwmfFont.java src/org/apache/poi/hwmf/record/HwmfText.java testcases/org/apache/poi/hwmf/TestHwmfParsing.java

Author: tallison
Date: Thu Jan 19 20:19:26 2017
New Revision: 1779519

URL: http://svn.apache.org/viewvc?rev=1779519&view=rev
Log:
Bug 60608 -- improve charset handling in Hwmf

Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java?rev=1779519&r1=1779518&r2=1779519&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/draw/HwmfGraphics.java Thu Jan 19 20:19:26 2017
@@ -29,6 +29,7 @@ import java.awt.font.TextAttribute;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.Rectangle2D;
 import java.awt.image.BufferedImage;
+import java.nio.charset.Charset;
 import java.text.AttributedString;
 import java.util.ArrayList;
 import java.util.LinkedList;
@@ -48,8 +49,11 @@ import org.apache.poi.hwmf.record.HwmfPe
 import org.apache.poi.sl.draw.DrawFactory;
 import org.apache.poi.sl.draw.DrawFontManager;
 import org.apache.poi.sl.draw.Drawable;
+import org.apache.poi.util.LocaleUtil;
 
 public class HwmfGraphics {
+
+    private static final Charset DEFAULT_CHARSET = LocaleUtil.CHARSET_1252;
     private final Graphics2D graphicsCtx;
     private final List<HwmfDrawProperties> propStack = new LinkedList<HwmfDrawProperties>();
     private HwmfDrawProperties prop = new HwmfDrawProperties();
@@ -311,14 +315,34 @@ public class HwmfGraphics {
             break;
         }
     }
-    
+
+    /**
+     *
+     * @param text
+     * @param bounds
+     * @deprecated use {@link #drawString(byte[], Rectangle2D)}
+     */
     public void drawString(String text, Rectangle2D bounds) {
         drawString(text, bounds, null);
     }
-    
+
+    public void drawString(byte[] text, Rectangle2D bounds) {
+        drawString(text, bounds, null);
+    }
+
+    /**
+     *
+     * @param text
+     * @param bounds
+     * @deprecated use {@link #drawString(byte[], Rectangle2D, int[])}
+     */
     public void drawString(String text, Rectangle2D bounds, int dx[]) {
+        drawString(text.getBytes(DEFAULT_CHARSET), bounds, dx);
+    }
+
+    public void drawString(byte[] text, Rectangle2D bounds, int dx[]) {
         HwmfFont font = prop.getFont();
-        if (font == null || text == null || text.isEmpty()) {
+        if (font == null || text == null || text.length == 0) {
             return;
         }
         
@@ -326,8 +350,11 @@ public class HwmfGraphics {
         // TODO: another approx. ...
         double fontW = fontH/1.8;
         
-        int len = text.length();
-        AttributedString as = new AttributedString(text);
+        int len = text.length;
+        Charset charset = (font.getCharSet().getCharset() == null)?
+                DEFAULT_CHARSET : font.getCharSet().getCharset();
+        String textString = new String(text, charset);
+        AttributedString as = new AttributedString(textString);
         if (dx == null || dx.length == 0) {
             addAttributes(as, font);
         } else {

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java?rev=1779519&r1=1779518&r2=1779519&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfFont.java Thu Jan 19 20:19:26 2017
@@ -19,67 +19,93 @@ package org.apache.poi.hwmf.record;
 
 import java.io.IOException;
 import java.nio.charset.Charset;
+import java.nio.charset.UnsupportedCharsetException;
 
 import org.apache.poi.util.LittleEndianConsts;
 import org.apache.poi.util.LittleEndianInputStream;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
 
 /**
  * The Font object specifies the attributes of a logical font
  */
 public class HwmfFont {
+
+    private static final POILogger logger = POILogFactory.getLogger(HwmfFont.class);
+
     public enum WmfCharset {
         /** Specifies the English character set. */
-        ANSI_CHARSET(0x00000000),
+        ANSI_CHARSET(0x00000000, "Cp1252"),
         /**
          * Specifies a character set based on the current system locale;
          * for example, when the system locale is United States English,
          * the default character set is ANSI_CHARSET.
          */
-        DEFAULT_CHARSET(0x00000001),
+        DEFAULT_CHARSET(0x00000001, "Cp1252"),
         /** Specifies a character set of symbols. */
-        SYMBOL_CHARSET(0x00000002),
+        SYMBOL_CHARSET(0x00000002, ""),
         /** Specifies the Apple Macintosh character set. */
-        MAC_CHARSET(0x0000004D),
+        MAC_CHARSET(0x0000004D, "MacRoman"),
         /** Specifies the Japanese character set. */
-        SHIFTJIS_CHARSET(0x00000080),
+        SHIFTJIS_CHARSET(0x00000080, "Shift_JIS"),
         /** Also spelled "Hangeul". Specifies the Hangul Korean character set. */
-        HANGUL_CHARSET(0x00000081),
+        HANGUL_CHARSET(0x00000081, "cp949"),
         /** Also spelled "Johap". Specifies the Johab Korean character set. */
-        JOHAB_CHARSET(0x00000082),
+        JOHAB_CHARSET(0x00000082, "x-Johab"),
         /** Specifies the "simplified" Chinese character set for People's Republic of China. */
-        GB2312_CHARSET(0x00000086),
+        GB2312_CHARSET(0x00000086, "GB2312"),
         /**
          * Specifies the "traditional" Chinese character set, used mostly in
          * Taiwan and in the Hong Kong and Macao Special Administrative Regions.
          */
-        CHINESEBIG5_CHARSET(0x00000088),
+        CHINESEBIG5_CHARSET(0x00000088, "Big5"),
         /** Specifies the Greek character set. */
-        GREEK_CHARSET(0x000000A1),
+        GREEK_CHARSET(0x000000A1, "Cp1253"),
         /** Specifies the Turkish character set. */
-        TURKISH_CHARSET(0x000000A2),
+        TURKISH_CHARSET(0x000000A2, "Cp1254"),
         /** Specifies the Vietnamese character set. */
-        VIETNAMESE_CHARSET(0x000000A3),
+        VIETNAMESE_CHARSET(0x000000A3, "Cp1258"),
         /** Specifies the Hebrew character set. */
-        HEBREW_CHARSET(0x000000B1),
+        HEBREW_CHARSET(0x000000B1, "Cp1255"),
         /** Specifies the Arabic character set. */
-        ARABIC_CHARSET(0x000000B2),
+        ARABIC_CHARSET(0x000000B2, "Cp1256"),
         /** Specifies the Baltic (Northeastern European) character set. */
-        BALTIC_CHARSET(0x000000BA),
+        BALTIC_CHARSET(0x000000BA, "Cp1257"),
         /** Specifies the Russian Cyrillic character set. */
-        RUSSIAN_CHARSET(0x000000CC),
+        RUSSIAN_CHARSET(0x000000CC, "Cp1251"),
         /** Specifies the Thai character set. */
-        THAI_CHARSET(0x000000DE),
+        THAI_CHARSET(0x000000DE, "x-windows-874"),
         /** Specifies a Eastern European character set. */
-        EASTEUROPE_CHARSET(0x000000EE),
+        EASTEUROPE_CHARSET(0x000000EE, "Cp1250"),
         /**
          * Specifies a mapping to one of the OEM code pages,
          * according to the current system locale setting.
          */
-        OEM_CHARSET(0x000000FF);
+        OEM_CHARSET(0x000000FF, "Cp1252");
 
         int flag;
-        WmfCharset(int flag) {
+        Charset charset;
+
+        WmfCharset(int flag, String javaCharsetName) {
             this.flag = flag;
+            if (javaCharsetName.length() > 0) {
+                try {
+                    charset = Charset.forName(javaCharsetName);
+                    return;
+                } catch (UnsupportedCharsetException e) {
+                    logger.log(POILogger.WARN, "Unsupported charset: "+javaCharsetName);
+                }
+            }
+            charset = null;
+        }
+
+        /**
+         *
+         * @return charset for the font or <code>null</code> if there is no matching charset or
+         *         if the charset is a &quot;default&quot;
+         */
+        public Charset getCharset() {
+            return charset;
         }
 
         static WmfCharset valueOf(int flag) {

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java?rev=1779519&r1=1779518&r2=1779519&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hwmf/record/HwmfText.java Thu Jan 19 20:19:26 2017
@@ -19,6 +19,7 @@ package org.apache.poi.hwmf.record;
 
 import java.awt.geom.Rectangle2D;
 import java.io.IOException;
+import java.nio.charset.Charset;
 
 import org.apache.poi.hwmf.draw.HwmfDrawProperties;
 import org.apache.poi.hwmf.draw.HwmfGraphics;
@@ -27,7 +28,6 @@ import org.apache.poi.util.BitField;
 import org.apache.poi.util.BitFieldFactory;
 import org.apache.poi.util.LittleEndianConsts;
 import org.apache.poi.util.LittleEndianInputStream;
-import org.apache.poi.util.LocaleUtil;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
 
@@ -144,7 +144,7 @@ public class HwmfText {
          * length of the string.
          * The string is written at the location specified by the XStart and YStart fields.
          */
-        private String text;
+        private byte[] rawTextBytes;
         /**
          * A 16-bit signed integer that defines the vertical (y-axis) coordinate, in logical
          * units, of the point where drawing is to start.
@@ -164,18 +164,33 @@ public class HwmfText {
         @Override
         public int init(LittleEndianInputStream leis, long recordSize, int recordFunction) throws IOException {
             stringLength = leis.readShort();
-            byte buf[] = new byte[stringLength+(stringLength&1)];
-            leis.readFully(buf);
-            text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252).trim();
+            rawTextBytes = new byte[stringLength+(stringLength&1)];
+            leis.readFully(rawTextBytes);
             yStart = leis.readShort();
             xStart = leis.readShort();
-            return 3*LittleEndianConsts.SHORT_SIZE+buf.length;
+            return 3*LittleEndianConsts.SHORT_SIZE+rawTextBytes.length;
         }
 
         @Override
         public void draw(HwmfGraphics ctx) {
             Rectangle2D bounds = new Rectangle2D.Double(xStart, yStart, 0, 0);
-            ctx.drawString(text, bounds);
+            ctx.drawString(getTextBytes(), bounds);
+        }
+
+        public String getText(Charset charset) {
+            return new String(getTextBytes(), charset);
+        }
+
+        /**
+         *
+         * @return a copy of a trimmed byte array of rawTextBytes bytes.
+         * This includes only the bytes from 0..stringLength.
+         * This does not include the extra optional padding on the byte array.
+         */
+        private byte[] getTextBytes() {
+            byte[] ret = new byte[stringLength];
+            System.arraycopy(rawTextBytes, 0, ret, 0, stringLength);
+            return ret;
         }
     }
     
@@ -264,7 +279,7 @@ public class HwmfText {
          * the length is odd, an extra byte is placed after it so that the following member (optional Dx) is 
          * aligned on a 16-bit boundary.
          */
-        private String text;
+        private byte[] rawTextBytes;
         /**
          * An optional array of 16-bit signed integers that indicate the distance between 
          * origins of adjacent character cells. For example, Dx[i] logical units separate the origins of 
@@ -300,10 +315,9 @@ public class HwmfText {
                 size += 4*LittleEndianConsts.SHORT_SIZE;
             }
             
-            byte buf[] = new byte[stringLength+(stringLength&1)];
-            leis.readFully(buf);
-            text = new String(buf, 0, stringLength, LocaleUtil.CHARSET_1252);
-            size += buf.length;
+            rawTextBytes = new byte[stringLength+(stringLength&1)];
+            leis.readFully(rawTextBytes);
+            size += rawTextBytes.length;
             
             if (size >= remainingRecordSize) {
                 logger.log(POILogger.INFO, "META_EXTTEXTOUT doesn't contain character tracking info");
@@ -327,7 +341,23 @@ public class HwmfText {
         @Override
         public void draw(HwmfGraphics ctx) {
             Rectangle2D bounds = new Rectangle2D.Double(x, y, 0, 0);
-            ctx.drawString(text, bounds, dx);
+            ctx.drawString(getTextBytes(), bounds, dx);
+        }
+
+        public String getText(Charset charset) {
+            return new String(getTextBytes(), charset);
+        }
+
+        /**
+         *
+         * @return a copy of a trimmed byte array of rawTextBytes bytes.
+         * This includes only the bytes from 0..stringLength.
+         * This does not include the extra optional padding on the byte array.
+         */
+        private byte[] getTextBytes() {
+            byte[] ret = new byte[stringLength];
+            System.arraycopy(rawTextBytes, 0, ret, 0, stringLength);
+            return ret;
         }
     }
     
@@ -523,5 +553,9 @@ public class HwmfText {
         public void applyObject(HwmfGraphics ctx) {
             ctx.getProperties().setFont(font);
         }
+
+        public HwmfFont getFont() {
+            return font;
+        }
     }
 }

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java?rev=1779519&r1=1779518&r2=1779519&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hwmf/TestHwmfParsing.java Thu Jan 19 20:19:26 2017
@@ -18,7 +18,9 @@
 package org.apache.poi.hwmf;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
+import javax.imageio.ImageIO;
 import java.awt.Dimension;
 import java.awt.Graphics2D;
 import java.awt.RenderingHints;
@@ -31,21 +33,24 @@ import java.io.FileOutputStream;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.net.URL;
+import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Locale;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
-import javax.imageio.ImageIO;
-
 import org.apache.poi.POIDataSamples;
 import org.apache.poi.hwmf.record.HwmfFill.HwmfImageRecord;
+import org.apache.poi.hwmf.record.HwmfFont;
 import org.apache.poi.hwmf.record.HwmfRecord;
+import org.apache.poi.hwmf.record.HwmfRecordType;
+import org.apache.poi.hwmf.record.HwmfText;
 import org.apache.poi.hwmf.usermodel.HwmfPicture;
 import org.apache.poi.sl.usermodel.PictureData;
 import org.apache.poi.sl.usermodel.PictureData.PictureType;
 import org.apache.poi.sl.usermodel.SlideShow;
 import org.apache.poi.sl.usermodel.SlideShowFactory;
+import org.apache.poi.util.LocaleUtil;
 import org.apache.poi.util.Units;
 import org.junit.Ignore;
 import org.junit.Test;
@@ -188,4 +193,33 @@ public class TestHwmfParsing {
             }
         }
     }
+
+    @Test
+    @Ignore("If we decide we can use common crawl file specified, we can turn this back on")
+    public void testCyrillic() throws Exception {
+        //TODO: move test file to framework and fix this
+        File dir = new File("C:/somethingOrOther");
+        File f = new File(dir, "ZMLH54SPLI76NQ7XMKVB7SMUJA2HTXTS-2.wmf");
+        HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
+
+        Charset charset = LocaleUtil.CHARSET_1252;
+        StringBuilder sb = new StringBuilder();
+        //this is pure hackery for specifying the font
+        //this happens to work on this test file, but you need to
+        //do what Graphics does by maintaining the stack, etc.!
+        for (HwmfRecord r : wmf.getRecords()) {
+            if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
+                HwmfFont font = ((HwmfText.WmfCreateFontIndirect)r).getFont();
+                charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
+            }
+            if (r.getRecordType().equals(HwmfRecordType.extTextOut)) {
+                HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut)r;
+                sb.append(textOut.getText(charset)).append("\n");
+            }
+        }
+        String txt = sb.toString();
+        assertTrue(txt.contains("\u041E\u0431\u0449\u043E"));
+        assertTrue(txt.contains("\u0411\u0430\u043B\u0430\u043D\u0441"));
+    }
+
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org