You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/09/06 20:45:09 UTC

svn commit: r1622903 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/encoding/ main/java/org/apache/pdfbox/pdmodel/font/ main/resources/org/apache/pdfbox/resources/ test/java/org/apache/pdfbox/pdmodel/font/

Author: jahewson
Date: Sat Sep  6 18:45:09 2014
New Revision: 1622903

URL: http://svn.apache.org/r1622903
Log:
PDFBOX-2317: ZapfDingbats uses its own glyph list

Added:
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/GlyphList.java Sat Sep  6 18:45:09 2014
@@ -36,17 +36,18 @@ import java.util.StringTokenizer;
 public class GlyphList
 {
     private static final Log LOG = LogFactory.getLog(GlyphList.class);
-
-    private static final Map<String, String> NAME_TO_UNICODE = new HashMap<String, String>();
-    private static final Map<String, String> UNICODE_TO_NAME = new HashMap<String, String>();
+    public static final GlyphList DEFAULT;
+    public static final GlyphList ZAPF_DINGBATS;
 
     static
     {
+        DEFAULT = new GlyphList();
+
         // Loads the official glyph List based on adobes glyph list
-        loadGlyphs("org/apache/pdfbox/resources/glyphlist.properties");
+        DEFAULT.loadGlyphs("org/apache/pdfbox/resources/glyphlist.properties");
 
         // Loads some additional glyph mappings
-        loadGlyphs("org/apache/pdfbox/resources/additional_glyphlist.properties");
+        DEFAULT.loadGlyphs("org/apache/pdfbox/resources/additional_glyphlist.properties");
 
         // Load an external glyph list file that user can give as JVM property
         try
@@ -57,7 +58,7 @@ public class GlyphList
                 File external = new File(location);
                 if (external.exists())
                 {
-                    loadGlyphs(location);
+                    DEFAULT.loadGlyphs(location);
                 }
             }
         }
@@ -67,19 +68,25 @@ public class GlyphList
         }
 
         // todo: this is not desirable in many cases, should be done much later, e.g. TextStripper
-        NAME_TO_UNICODE.put("fi", "fi");
-        NAME_TO_UNICODE.put("fl", "fl");
-        NAME_TO_UNICODE.put("ffi", "ffi");
-        NAME_TO_UNICODE.put("ff", "ff");
-        NAME_TO_UNICODE.put("pi", "pi");
+        DEFAULT.nameToUnicode.put("fi", "fi");
+        DEFAULT.nameToUnicode.put("fl", "fl");
+        DEFAULT.nameToUnicode.put("ffi", "ffi");
+        DEFAULT.nameToUnicode.put("ff", "ff");
+        DEFAULT.nameToUnicode.put("pi", "pi");
+
+        // Zapf Dingbats has its own glyph list
+        ZAPF_DINGBATS = new GlyphList();
+        ZAPF_DINGBATS.loadGlyphs("org/apache/pdfbox/resources/zapf_dingbats.properties");
+    }
 
-        for (Map.Entry<String, String> entry : NAME_TO_UNICODE.entrySet())
-        {
-            UNICODE_TO_NAME.put(entry.getValue(), entry.getKey());
-        }
+    private final Map<String, String> nameToUnicode = new HashMap<String, String>();
+    private final Map<String, String> unicodeToName = new HashMap<String, String>();
+
+    private GlyphList()
+    {
     }
 
-    private static void loadGlyphs(String path)
+    private void loadGlyphs(String path)
     {
         try
         {
@@ -101,14 +108,17 @@ public class GlyphList
                     int characterCode = Integer.parseInt(tokenizer.nextToken(), 16);
                     value.append((char) characterCode);
                 }
-                if (NAME_TO_UNICODE.containsKey(glyphName))
+                if (nameToUnicode.containsKey(glyphName))
                 {
-                    LOG.warn("duplicate value for " + glyphName + " -> " + value);
+                    LOG.warn("duplicate value for " + glyphName + " -> " + value + " " +
+                             nameToUnicode.get(glyphName));
                 }
                 else
                 {
-                    NAME_TO_UNICODE.put(glyphName, value.toString());
+                    nameToUnicode.put(glyphName, value.toString());
                 }
+                // reverse mapping
+                unicodeToName.put(value.toString(), glyphName);
             }
         }
         catch (IOException io)
@@ -123,9 +133,9 @@ public class GlyphList
      * @param c Unicode character
      * @return PostScript glyph name, or ".notdef"
      */
-    public static String unicodeToName(char c)
+    public String unicodeToName(char c)
     {
-        String name = UNICODE_TO_NAME.get(Character.toString(c));
+        String name = unicodeToName.get(Character.toString(c));
         if (name == null)
         {
             return ".notdef";
@@ -139,14 +149,14 @@ public class GlyphList
      * @param name PostScript glyph name
      * @return Unicode character(s), or null.
      */
-    public static String toUnicode(String name)
+    public String toUnicode(String name)
     {
         if (name == null)
         {
             return null;
         }
 
-        String unicode = NAME_TO_UNICODE.get(name);
+        String unicode = nameToUnicode.get(name);
         if (unicode == null)
         {
             // test if we have a suffix and if so remove it
@@ -200,7 +210,7 @@ public class GlyphList
                     LOG.warn("Not a number in Unicode character name: " + name);
                 }
             }
-            NAME_TO_UNICODE.put(name, unicode);
+            nameToUnicode.put(name, unicode);
         }
         return unicode;
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2.java Sat Sep  6 18:45:09 2014
@@ -261,7 +261,7 @@ public class PDCIDFontType2 extends PDCI
                 }
 
                 // map to a Unicode value using the Adobe Glyph List
-                unicode = GlyphList.toUnicode(name);
+                unicode = GlyphList.DEFAULT.toUnicode(name);
             }
             else
             {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Sat Sep  6 18:45:09 2014
@@ -60,7 +60,8 @@ public abstract class PDSimpleFont exten
     }
 
     protected Encoding encoding;
-    private final Set<Integer> noUnicode = new HashSet<Integer>();
+    protected GlyphList glyphList;
+    private final Set<Integer> noUnicode = new HashSet<Integer>(); // for logging
 
     /**
      * Constructor
@@ -130,6 +131,16 @@ public abstract class PDSimpleFont exten
         {
             this.encoding = readEncodingFromFont();
         }
+
+        // assign the glyph list based on the font
+        if (getBaseFont().equals("ZapfDingbats"))
+        {
+            glyphList = GlyphList.ZAPF_DINGBATS;
+        }
+        else
+        {
+            glyphList = GlyphList.DEFAULT;
+        }
     }
 
     /**
@@ -147,6 +158,14 @@ public abstract class PDSimpleFont exten
         return encoding;
     }
 
+    /**
+     * Returns the Encoding vector.
+     */
+    public GlyphList getGlyphList()
+    {
+        return glyphList;
+    }
+
     @Override
     protected Boolean isFontSymbolic()
     {
@@ -215,10 +234,10 @@ public abstract class PDSimpleFont exten
         //    b) Look up the name in the Adobe Glyph List to obtain the Unicode value
 
         String name = null;
-        if (getEncoding() != null)
+        if (encoding != null)
         {
             name = encoding.getName(code);
-            unicode = GlyphList.toUnicode(name);
+            unicode = glyphList.toUnicode(name); // todo: tie a final GlyphList instance to each PDFont in the constructor.
             if (unicode != null)
             {
                 return unicode;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Sat Sep  6 18:45:09 2014
@@ -228,7 +228,7 @@ public class PDTrueTypeFont extends PDSi
                 // (3, 1) - (Windows, Unicode)
                 if (cmapWinUnicode != null)
                 {
-                    String unicode = GlyphList.toUnicode(name);
+                    String unicode = GlyphList.DEFAULT.toUnicode(name);
                     if (unicode != null)
                     {
                         gid = unicode.codePointAt(0);

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFontEmbedder.java Sat Sep  6 18:45:09 2014
@@ -287,7 +287,7 @@ class PDTrueTypeFontEmbedder
             // pdf code to unicode by glyph list.
             if (!name.equals(".notdef"))
             {
-                String c = GlyphList.toUnicode(name);
+                String c = GlyphList.DEFAULT.toUnicode(name);
                 int charCode = c.codePointAt(0);
                 int gid = uniMap.getGlyphId(charCode);
                 if (gid != 0)

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Sat Sep  6 18:45:09 2014
@@ -251,7 +251,7 @@ public class PDType0Font extends PDFont
             // this nonsymbolic behaviour isn't well documented, test with PDFBOX-1422,
             // also see PDCIDFontType2#cidToGID()
             String name = StandardEncoding.INSTANCE.getName(code);
-            return GlyphList.toUnicode(name);
+            return GlyphList.DEFAULT.toUnicode(name);
         }
         else if (isCMapPredefined && cMapUCS2 != null)
         {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Sat Sep  6 18:45:09 2014
@@ -213,7 +213,7 @@ public class PDType1CFont extends PDSimp
         for (int i = 0; i < string.length(); i++)
         {
             String character = string.substring(i, i + 1);
-            String name = GlyphList.unicodeToName(character.charAt(0));
+            String name = getGlyphList().unicodeToName(character.charAt(0));
             width += cffFont.getType1CharString(name).getWidth();
         }
         return width;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Sat Sep  6 18:45:09 2014
@@ -423,7 +423,7 @@ public class PDType1Font extends PDSimpl
             else
             {
                 // try unicode name
-                String unicodes = GlyphList.toUnicode(name);
+                String unicodes = getGlyphList().toUnicode(name);
                 if (unicodes != null)
                 {
                     if (unicodes.length() == 1)

Added: pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties?rev=1622903&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties (added)
+++ pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/zapf_dingbats.properties Sat Sep  6 18:45:09 2014
@@ -0,0 +1,219 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License")= you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# This list represents the mapping from glyph names to unicode values.
+#
+a100=275E
+a101=2761
+a102=2762
+a103=2763
+a104=2764
+a105=2710
+a106=2765
+a107=2766
+a108=2767
+a109=2660
+a10=2721
+a110=2665
+a111=2666
+a112=2663
+a117=2709
+a118=2708
+a119=2707
+a11=261B
+a120=2460
+a121=2461
+a122=2462
+a123=2463
+a124=2464
+a125=2465
+a126=2466
+a127=2467
+a128=2468
+a129=2469
+a12=261E
+a130=2776
+a131=2777
+a132=2778
+a133=2779
+a134=277A
+a135=277B
+a136=277C
+a137=277D
+a138=277E
+a139=277F
+a13=270C
+a140=2780
+a141=2781
+a142=2782
+a143=2783
+a144=2784
+a145=2785
+a146=2786
+a147=2787
+a148=2788
+a149=2789
+a14=270D
+a150=278A
+a151=278B
+a152=278C
+a153=278D
+a154=278E
+a155=278F
+a156=2790
+a157=2791
+a158=2792
+a159=2793
+a15=270E
+a160=2794
+a161=2192
+a162=27A3
+a163=2194
+a164=2195
+a165=2799
+a166=279B
+a167=279C
+a168=279D
+a169=279E
+a16=270F
+a170=279F
+a171=27A0
+a172=27A1
+a173=27A2
+a174=27A4
+a175=27A5
+a176=27A6
+a177=27A7
+a178=27A8
+a179=27A9
+a17=2711
+a180=27AB
+a181=27AD
+a182=27AF
+a183=27B2
+a184=27B3
+a185=27B5
+a186=27B8
+a187=27BA
+a188=27BB
+a189=27BC
+a18=2712
+a190=27BD
+a191=27BE
+a192=279A
+a193=27AA
+a194=27B6
+a195=27B9
+a196=2798
+a197=27B4
+a198=27B7
+a199=27AC
+a19=2713
+a1=2701
+a200=27AE
+a201=27B1
+a202=2703
+a203=2750
+a204=2752
+a205=276E
+a206=2770
+a20=2714
+a21=2715
+a22=2716
+a23=2717
+a24=2718
+a25=2719
+a26=271A
+a27=271B
+a28=271C
+a29=2722
+a2=2702
+a30=2723
+a31=2724
+a32=2725
+a33=2726
+a34=2727
+a35=2605
+a36=2729
+a37=272A
+a38=272B
+a39=272C
+a3=2704
+a40=272D
+a41=272E
+a42=272F
+a43=2730
+a44=2731
+a45=2732
+a46=2733
+a47=2734
+a48=2735
+a49=2736
+a4=260E
+a50=2737
+a51=2738
+a52=2739
+a53=273A
+a54=273B
+a55=273C
+a56=273D
+a57=273E
+a58=273F
+a59=2740
+a5=2706
+a60=2741
+a61=2742
+a62=2743
+a63=2744
+a64=2745
+a65=2746
+a66=2747
+a67=2748
+a68=2749
+a69=274A
+a6=271D
+a70=274B
+a71=25CF
+a72=274D
+a73=25A0
+a74=274F
+a75=2751
+a76=25B2
+a77=25BC
+a78=25C6
+a79=2756
+a7=271E
+a81=25D7
+a82=2758
+a83=2759
+a84=275A
+a85=276F
+a86=2771
+a87=2772
+a88=2773
+a89=2768
+a8=271F
+a90=2769
+a91=276C
+a92=276D
+a93=276A
+a94=276B
+a95=2774
+a96=2775
+a97=275B
+a98=275C
+a99=275D
+a9=2720

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java?rev=1622903&r1=1622902&r2=1622903&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java Sat Sep  6 18:45:09 2014
@@ -94,27 +94,27 @@ public class TestTTFParser
                         || "product".equals(name) || "integral".equals(name) || "Omega".equals(name)
                         || "radical".equals(name) || "tilde".equals(name))
                 {
-                    Assert.assertTrue(GlyphList.unicodeToName((char) charCode).startsWith(name));
+                    Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).startsWith(name));
                 }
                 else if ("bar".equals(name))
                 {
-                    Assert.assertTrue(GlyphList.unicodeToName((char) charCode).endsWith(name));
+                    Assert.assertTrue(GlyphList.DEFAULT.unicodeToName((char) charCode).endsWith(name));
                 }
                 else if ("sfthyphen".equals(name))
                 {
-                    Assert.assertEquals("softhyphen", GlyphList.unicodeToName((char) charCode));
+                    Assert.assertEquals("softhyphen", GlyphList.DEFAULT.unicodeToName((char) charCode));
                 }
-                else if ("periodcentered".equals(name) && !GlyphList.unicodeToName((char) charCode).equals(name))
+                else if ("periodcentered".equals(name) && !GlyphList.DEFAULT.unicodeToName((char) charCode).equals(name))
                 {
-                    Assert.assertEquals("bulletoperator", GlyphList.unicodeToName((char) charCode));
+                    Assert.assertEquals("bulletoperator", GlyphList.DEFAULT.unicodeToName((char) charCode));
                 }
                 else if ("fraction".equals(name))
                 {
-                    Assert.assertEquals("divisionslash", GlyphList.unicodeToName((char) charCode));
+                    Assert.assertEquals("divisionslash", GlyphList.DEFAULT.unicodeToName((char) charCode));
                 }
                 else if ("mu".equals(name))
                 {
-                    Assert.assertEquals("mu1", GlyphList.unicodeToName((char) charCode));
+                    Assert.assertEquals("mu1", GlyphList.DEFAULT.unicodeToName((char) charCode));
                 }
                 else if ("pi".equals(name))
                 {
@@ -122,7 +122,7 @@ public class TestTTFParser
                 }
                 else
                 {
-                    Assert.assertEquals(GlyphList.unicodeToName((char) charCode), name);
+                    Assert.assertEquals(GlyphList.DEFAULT.unicodeToName((char) charCode), name);
                 }
             }
         }