You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2011/01/16 17:04:53 UTC

svn commit: r1059595 [1/5] - in /pdfbox/trunk/pdfbox/src/main: java/org/apache/pdfbox/pdmodel/font/ resources/org/apache/pdfbox/resources/cmap/

Author: lehmi
Date: Sun Jan 16 16:04:52 2011
New Revision: 1059595

URL: http://svn.apache.org/viewvc?rev=1059595&view=rev
Log:
PDFBOX-941: added the missing UCS2 mapping files, improved the encoding of Type0 fonts

Added:
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-CNS1-UCS2
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-GB1-UCS2
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-Japan1-UCS2
    pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-Korea1-UCS2
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java Sun Jan 16 16:04:52 2011
@@ -21,12 +21,16 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.encoding.conversion.CMapSubstitution;
 import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.util.ResourceLoader;
 
 /**
  * This is implementation for the CIDFontType0/CIDFontType2 Fonts.
@@ -36,6 +40,11 @@ import org.apache.pdfbox.pdmodel.common.
  */
 public abstract class PDCIDFont extends PDSimpleFont
 {
+    /**
+     * Log instance.
+     */
+    private static final Log log = LogFactory.getLog(PDCIDFont.class);
+
     private Map<Integer,Float> widthCache = null;
     private long defaultWidth = 0;
     
@@ -272,4 +281,77 @@ public abstract class PDCIDFont extends 
         return width;
     }
 
+    /**
+     * Extract the CIDSystemInfo.
+     * @return the CIDSystemInfo as String
+     */
+    private String getCIDSystemInfo()
+    {
+        String cidSystemInfo = null; 
+        COSDictionary cidsysteminfo = (COSDictionary)font.getDictionaryObject(COSName.CIDSYSTEMINFO);
+        if (cidsysteminfo != null) 
+        {
+            String ordering = cidsysteminfo.getString(COSName.ORDERING);
+            String registry = cidsysteminfo.getString(COSName.REGISTRY);
+            int supplement = cidsysteminfo.getInt(COSName.SUPPLEMENT);
+            cidSystemInfo = registry + "-" + ordering+ "-" + supplement;
+        }
+        return cidSystemInfo;
+    }
+    
+    @Override
+    protected void determineEncoding()
+    {
+        String cidSystemInfo = getCIDSystemInfo();
+        if (cidSystemInfo != null) 
+        {
+            cidSystemInfo = CMapSubstitution.substituteCMap( cidSystemInfo );
+            cmap = cmapObjects.get( cidSystemInfo );
+            if (cmap == null)
+            {
+                String resourceName = resourceRootCMAP + cidSystemInfo;
+                try {
+                    parseCmap( resourceRootCMAP, ResourceLoader.loadResource( resourceName ), null );
+                    if( cmap == null)
+                    {
+                        log.error("Error: Could not parse predefined CMAP file for '" + cidSystemInfo + "'" );
+                    }
+                }
+                catch(IOException exception) 
+                {
+                    log.error("Error: Could not find predefined CMAP file for '" + cidSystemInfo + "'" );
+                }
+            }
+        }
+        else
+        {
+            super.determineEncoding();
+        }
+    }
+    
+    @Override
+    public String encode(byte[] c, int offset, int length) throws IOException
+    {
+        String result = null;
+        if (cmap != null)
+        {
+            if (length == 1 && cmap.hasOneByteMappings()) 
+            {
+                result = cmap.lookup(c, offset, length);
+            }
+            else if (length == 2 && cmap.hasTwoByteMappings())
+            {
+                result = cmap.lookup(c, offset, length);
+            }
+            if (result == null && cmap.hasCIDMappings())
+            {
+                result = cmap.lookupCID(getCodeFromArray(c, offset, length));
+            }
+        }
+        else
+        {
+            result = super.encode(c, offset, length);
+        }
+        return result;
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sun Jan 16 16:04:52 2011
@@ -84,6 +84,8 @@ public abstract class PDFont implements 
      * a type0 font with a cmap.
      */
     protected CMap cmap = null;
+    
+    private boolean hasToUnicode = false;
 
     protected static Map<String, CMap> cmapObjects =
         Collections.synchronizedMap( new HashMap<String, CMap>() );
@@ -342,7 +344,7 @@ public abstract class PDFont implements 
         FontMetric metric = getAFM();
         if( metric != null )
         {
-            Encoding encoding = getEncoding();
+            Encoding encoding = getFontEncoding();
             String characterName = encoding.getName( code );
             retval = metric.getCharacterWidth( characterName );
         }
@@ -402,7 +404,7 @@ public abstract class PDFont implements 
 
     private FontMetric afm = null;
     
-    private COSBase encodingObject = null;
+    private COSBase encoding = null;
     /**
      * cache the {@link COSName#ENCODING} object from
      * the font's dictionary since it is called so often.
@@ -413,11 +415,21 @@ public abstract class PDFont implements 
      * </pre>
      * @return
      */
-    protected COSBase getEncodingObject(){
-    	if(encodingObject==null){
-    		encodingObject = font.getDictionaryObject( COSName.ENCODING );
+    protected COSBase getEncoding(){
+    	if(encoding==null)
+    	{
+    		encoding = font.getDictionaryObject( COSName.ENCODING );
     	}
-    	return encodingObject;
+    	return encoding;
+    }
+
+    /**
+     * Set the encoding object from the fonts dictionary.
+     * @param encoding the given encoding.
+     */
+    protected void setEncoding(COSBase encoding){
+        font.setItem( COSName.ENCODING, encoding );
+        this.encoding = encoding;
     }
     
     /**
@@ -449,7 +461,7 @@ public abstract class PDFont implements 
         // there is no cmap but probably an encoding with a suitable mapping
         if( retval == null )
         {
-            Encoding encoding = getEncoding();
+            Encoding encoding = getFontEncoding();
             if( encoding != null )
             {
                 retval = encoding.getCharacter( getCodeFromArray( c, offset, length ) );
@@ -524,20 +536,17 @@ public abstract class PDFont implements 
      *
      * @param enc The font encoding.
      */
-    public void setEncoding( Encoding enc )
+    public void setFontEncoding( Encoding enc )
     {
-        font.setItem( COSName.ENCODING, enc );
         fontEncoding = enc;
     }
 
     /**
      * This will get or create the encoder.
      *
-     * modified by Christophe Huault : DGBS Strasbourg huault@free.fr october 2004
-     *
      * @return The encoding to use.
      */
-    public Encoding getEncoding()
+    public Encoding getFontEncoding()
     {
         return fontEncoding;
     }
@@ -763,4 +772,21 @@ public abstract class PDFont implements 
         return width;
     }
 
+    /**
+     * Determines if a font as a ToUnicode entry.
+     * @return true if the font has a ToUnicode entry
+     */
+    protected boolean hasToUnicode() 
+    {
+        return hasToUnicode;
+    }
+    
+    /**
+     * Sets hasToUnicode to the given value.
+     * @param hasToUnicode the given value for hasToUnicode
+     */
+    protected void setHasToUnicode(boolean hasToUnicode)
+    {
+        this.hasToUnicode = hasToUnicode;
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Sun Jan 16 16:04:52 2011
@@ -132,7 +132,7 @@ public abstract class PDSimpleFont exten
         FontMetric metric = getAFM();
         if( metric != null )
         {
-            Encoding encoding = getEncoding();
+            Encoding encoding = getFontEncoding();
             String characterName = encoding.getName( code );
             retval = metric.getCharacterHeight( characterName );
         }
@@ -331,7 +331,7 @@ public abstract class PDSimpleFont exten
     {
         String cmapName = null;
         COSName encodingName = null;
-        COSBase encoding = getEncodingObject(); 
+        COSBase encoding = getEncoding(); 
         Encoding fontEncoding = null;
         if (encoding != null) 
         {
@@ -386,19 +386,9 @@ public abstract class PDSimpleFont exten
                 }
             }
         }
-        setEncoding(fontEncoding);
+        setFontEncoding(fontEncoding);
         extractToUnicodeEncoding();
 
-        COSDictionary cidsysteminfo = (COSDictionary)font.getDictionaryObject(COSName.CIDSYSTEMINFO);
-        if (cidsysteminfo != null) 
-        {
-            String ordering = cidsysteminfo.getString(COSName.ORDERING);
-            String registry = cidsysteminfo.getString(COSName.REGISTRY);
-            int supplement = cidsysteminfo.getInt(COSName.SUPPLEMENT);
-            cmapName = registry + "-" + ordering+ "-" + supplement;
-            cmapName = CMapSubstitution.substituteCMap( cmapName );
-            cmap = cmapObjects.get( cmapName );
-        }
         if (cmap == null && cmapName != null) 
         {
             String resourceName = resourceRootCMAP + cmapName;
@@ -423,6 +413,7 @@ public abstract class PDSimpleFont exten
         COSBase toUnicode = getToUnicode();
         if( toUnicode != null )
         {
+            setHasToUnicode(true);
             if ( toUnicode instanceof COSStream )
             {
                 try {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Sun Jan 16 16:04:52 2011
@@ -169,7 +169,8 @@ public class PDTrueTypeFont extends PDSi
         }
         //only support winansi encoding right now, should really
         //just use Identity-H with unicode mapping
-        retval.setEncoding( new WinAnsiEncoding() );
+        retval.setFontEncoding( new WinAnsiEncoding() );
+        retval.setEncoding(COSName.WIN_ANSI_ENCODING);
         return retval;
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Sun Jan 16 16:04:52 2011
@@ -170,4 +170,17 @@ public class PDType0Font extends PDSimpl
         return descendentFont.getFontWidth(charCode);
     }
 
+    @Override
+    public String encode(byte[] c, int offset, int length) throws IOException
+    {
+        if (hasToUnicode())
+        {
+            return super.encode(c, offset, length);
+        }
+        else
+        {
+            // TODO additional mapping if not Identity
+            return descendentFont.encode(c, offset, length);
+        }
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java Sun Jan 16 16:04:52 2011
@@ -125,7 +125,7 @@ public class PDType1AfmPfbFont extends P
         AFMParser parser = new AFMParser(afm);
         parser.parse();
         metric = parser.getResult();
-        setEncoding(afmToDictionary(new AFMEncoding(metric)));
+        setFontEncoding(afmToDictionary(new AFMEncoding(metric)));
 
         // set the values
         setBaseFont(metric.getFontName());
@@ -147,7 +147,7 @@ public class PDType1AfmPfbFont extends P
 
         // widths
         List<CharMetric> listmetric = metric.getCharMetrics();
-        Encoding encoding = getEncoding();
+        Encoding encoding = getFontEncoding();
         int maxWidths = 256;
         List<Number> widths = new ArrayList(maxWidths);
         Integer zero = new Integer(250);

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Sun Jan 16 16:04:52 2011
@@ -176,7 +176,8 @@ public class PDType1Font extends PDSimpl
     {
         this();
         setBaseFont( baseFont );
-        setEncoding(new WinAnsiEncoding());
+        setFontEncoding(new WinAnsiEncoding());
+        setEncoding(COSName.WIN_ANSI_ENCODING);
     }
 
     /**
@@ -263,7 +264,7 @@ public class PDType1Font extends PDSimpl
     protected void determineEncoding()
     {
         super.determineEncoding();
-        Encoding fontEncoding = getEncoding();
+        Encoding fontEncoding = getFontEncoding();
         if(fontEncoding == null)
         {
             FontMetric metric = getAFM();
@@ -272,8 +273,8 @@ public class PDType1Font extends PDSimpl
                 fontEncoding = new AFMEncoding( metric );
             }
         }
-        getEncodingFromFont(getEncoding() == null);
-        setEncoding(fontEncoding);
+        getEncodingFromFont(getFontEncoding() == null);
+        setFontEncoding(fontEncoding);
     }
     
     /**
@@ -306,7 +307,7 @@ public class PDType1Font extends PDSimpl
                         {
                             if (line.startsWith("currentdict end")) {
                                 if (encoding != null)
-                                    setEncoding(encoding);
+                                    setFontEncoding(encoding);
                                 break;
                             }
                             if (line.startsWith("/Encoding")) 
@@ -321,13 +322,13 @@ public class PDType1Font extends PDSimpl
                                 }
                                 // if there is already an encoding, we don't need to
                                 // assign another one
-                                else if (getEncoding() == null)
+                                else if (getFontEncoding() == null)
                                 {
                                     StringTokenizer st = new StringTokenizer(line);
                                     // ignore the first token
                                     st.nextElement();
                                     String type1Encoding = st.nextToken();
-                                    setEncoding(
+                                    setFontEncoding(
                                         EncodingManager.INSTANCE.getEncoding(
                                                 COSName.getPDFName(type1Encoding)));
                                     break;
@@ -387,7 +388,7 @@ public class PDType1Font extends PDSimpl
     @Override
     public String encode(byte[] c, int offset, int length) throws IOException
     {
-        if (type1CFont != null && getEncoding() == null)
+        if (type1CFont != null && getFontEncoding() == null)
         {
             return type1CFont.encode(c, offset, length);
         }