You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2011/01/16 17:04:53 UTC
svn commit: r1059595 [1/5] - in /pdfbox/trunk/pdfbox/src/main:
java/org/apache/pdfbox/pdmodel/font/
resources/org/apache/pdfbox/resources/cmap/
Author: lehmi
Date: Sun Jan 16 16:04:52 2011
New Revision: 1059595
URL: http://svn.apache.org/viewvc?rev=1059595&view=rev
Log:
PDFBOX-941: added the missing UCS2 mapping files, improved the encoding of Type0 fonts
Added:
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-CNS1-UCS2
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-GB1-UCS2
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-Japan1-UCS2
pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/cmap/Adobe-Korea1-UCS2
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFont.java Sun Jan 16 16:04:52 2011
@@ -21,12 +21,16 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.encoding.conversion.CMapSubstitution;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.util.ResourceLoader;
/**
* This is implementation for the CIDFontType0/CIDFontType2 Fonts.
@@ -36,6 +40,11 @@ import org.apache.pdfbox.pdmodel.common.
*/
public abstract class PDCIDFont extends PDSimpleFont
{
+ /**
+ * Log instance.
+ */
+ private static final Log log = LogFactory.getLog(PDCIDFont.class);
+
private Map<Integer,Float> widthCache = null;
private long defaultWidth = 0;
@@ -272,4 +281,77 @@ public abstract class PDCIDFont extends
return width;
}
+ /**
+ * Extract the CIDSystemInfo.
+ * @return the CIDSystemInfo as String
+ */
+ private String getCIDSystemInfo()
+ {
+ String cidSystemInfo = null;
+ COSDictionary cidsysteminfo = (COSDictionary)font.getDictionaryObject(COSName.CIDSYSTEMINFO);
+ if (cidsysteminfo != null)
+ {
+ String ordering = cidsysteminfo.getString(COSName.ORDERING);
+ String registry = cidsysteminfo.getString(COSName.REGISTRY);
+ int supplement = cidsysteminfo.getInt(COSName.SUPPLEMENT);
+ cidSystemInfo = registry + "-" + ordering+ "-" + supplement;
+ }
+ return cidSystemInfo;
+ }
+
+ @Override
+ protected void determineEncoding()
+ {
+ String cidSystemInfo = getCIDSystemInfo();
+ if (cidSystemInfo != null)
+ {
+ cidSystemInfo = CMapSubstitution.substituteCMap( cidSystemInfo );
+ cmap = cmapObjects.get( cidSystemInfo );
+ if (cmap == null)
+ {
+ String resourceName = resourceRootCMAP + cidSystemInfo;
+ try {
+ parseCmap( resourceRootCMAP, ResourceLoader.loadResource( resourceName ), null );
+ if( cmap == null)
+ {
+ log.error("Error: Could not parse predefined CMAP file for '" + cidSystemInfo + "'" );
+ }
+ }
+ catch(IOException exception)
+ {
+ log.error("Error: Could not find predefined CMAP file for '" + cidSystemInfo + "'" );
+ }
+ }
+ }
+ else
+ {
+ super.determineEncoding();
+ }
+ }
+
+ @Override
+ public String encode(byte[] c, int offset, int length) throws IOException
+ {
+ String result = null;
+ if (cmap != null)
+ {
+ if (length == 1 && cmap.hasOneByteMappings())
+ {
+ result = cmap.lookup(c, offset, length);
+ }
+ else if (length == 2 && cmap.hasTwoByteMappings())
+ {
+ result = cmap.lookup(c, offset, length);
+ }
+ if (result == null && cmap.hasCIDMappings())
+ {
+ result = cmap.lookupCID(getCodeFromArray(c, offset, length));
+ }
+ }
+ else
+ {
+ result = super.encode(c, offset, length);
+ }
+ return result;
+ }
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sun Jan 16 16:04:52 2011
@@ -84,6 +84,8 @@ public abstract class PDFont implements
* a type0 font with a cmap.
*/
protected CMap cmap = null;
+
+ private boolean hasToUnicode = false;
protected static Map<String, CMap> cmapObjects =
Collections.synchronizedMap( new HashMap<String, CMap>() );
@@ -342,7 +344,7 @@ public abstract class PDFont implements
FontMetric metric = getAFM();
if( metric != null )
{
- Encoding encoding = getEncoding();
+ Encoding encoding = getFontEncoding();
String characterName = encoding.getName( code );
retval = metric.getCharacterWidth( characterName );
}
@@ -402,7 +404,7 @@ public abstract class PDFont implements
private FontMetric afm = null;
- private COSBase encodingObject = null;
+ private COSBase encoding = null;
/**
* cache the {@link COSName#ENCODING} object from
* the font's dictionary since it is called so often.
@@ -413,11 +415,21 @@ public abstract class PDFont implements
* </pre>
* @return
*/
- protected COSBase getEncodingObject(){
- if(encodingObject==null){
- encodingObject = font.getDictionaryObject( COSName.ENCODING );
+ protected COSBase getEncoding(){
+ if(encoding==null)
+ {
+ encoding = font.getDictionaryObject( COSName.ENCODING );
}
- return encodingObject;
+ return encoding;
+ }
+
+ /**
+ * Set the encoding object from the fonts dictionary.
+ * @param encoding the given encoding.
+ */
+ protected void setEncoding(COSBase encoding){
+ font.setItem( COSName.ENCODING, encoding );
+ this.encoding = encoding;
}
/**
@@ -449,7 +461,7 @@ public abstract class PDFont implements
// there is no cmap but probably an encoding with a suitable mapping
if( retval == null )
{
- Encoding encoding = getEncoding();
+ Encoding encoding = getFontEncoding();
if( encoding != null )
{
retval = encoding.getCharacter( getCodeFromArray( c, offset, length ) );
@@ -524,20 +536,17 @@ public abstract class PDFont implements
*
* @param enc The font encoding.
*/
- public void setEncoding( Encoding enc )
+ public void setFontEncoding( Encoding enc )
{
- font.setItem( COSName.ENCODING, enc );
fontEncoding = enc;
}
/**
* This will get or create the encoder.
*
- * modified by Christophe Huault : DGBS Strasbourg huault@free.fr october 2004
- *
* @return The encoding to use.
*/
- public Encoding getEncoding()
+ public Encoding getFontEncoding()
{
return fontEncoding;
}
@@ -763,4 +772,21 @@ public abstract class PDFont implements
return width;
}
+ /**
+ * Determines if a font as a ToUnicode entry.
+ * @return true if the font has a ToUnicode entry
+ */
+ protected boolean hasToUnicode()
+ {
+ return hasToUnicode;
+ }
+
+ /**
+ * Sets hasToUnicode to the given value.
+ * @param hasToUnicode the given value for hasToUnicode
+ */
+ protected void setHasToUnicode(boolean hasToUnicode)
+ {
+ this.hasToUnicode = hasToUnicode;
+ }
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDSimpleFont.java Sun Jan 16 16:04:52 2011
@@ -132,7 +132,7 @@ public abstract class PDSimpleFont exten
FontMetric metric = getAFM();
if( metric != null )
{
- Encoding encoding = getEncoding();
+ Encoding encoding = getFontEncoding();
String characterName = encoding.getName( code );
retval = metric.getCharacterHeight( characterName );
}
@@ -331,7 +331,7 @@ public abstract class PDSimpleFont exten
{
String cmapName = null;
COSName encodingName = null;
- COSBase encoding = getEncodingObject();
+ COSBase encoding = getEncoding();
Encoding fontEncoding = null;
if (encoding != null)
{
@@ -386,19 +386,9 @@ public abstract class PDSimpleFont exten
}
}
}
- setEncoding(fontEncoding);
+ setFontEncoding(fontEncoding);
extractToUnicodeEncoding();
- COSDictionary cidsysteminfo = (COSDictionary)font.getDictionaryObject(COSName.CIDSYSTEMINFO);
- if (cidsysteminfo != null)
- {
- String ordering = cidsysteminfo.getString(COSName.ORDERING);
- String registry = cidsysteminfo.getString(COSName.REGISTRY);
- int supplement = cidsysteminfo.getInt(COSName.SUPPLEMENT);
- cmapName = registry + "-" + ordering+ "-" + supplement;
- cmapName = CMapSubstitution.substituteCMap( cmapName );
- cmap = cmapObjects.get( cmapName );
- }
if (cmap == null && cmapName != null)
{
String resourceName = resourceRootCMAP + cmapName;
@@ -423,6 +413,7 @@ public abstract class PDSimpleFont exten
COSBase toUnicode = getToUnicode();
if( toUnicode != null )
{
+ setHasToUnicode(true);
if ( toUnicode instanceof COSStream )
{
try {
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDTrueTypeFont.java Sun Jan 16 16:04:52 2011
@@ -169,7 +169,8 @@ public class PDTrueTypeFont extends PDSi
}
//only support winansi encoding right now, should really
//just use Identity-H with unicode mapping
- retval.setEncoding( new WinAnsiEncoding() );
+ retval.setFontEncoding( new WinAnsiEncoding() );
+ retval.setEncoding(COSName.WIN_ANSI_ENCODING);
return retval;
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType0Font.java Sun Jan 16 16:04:52 2011
@@ -170,4 +170,17 @@ public class PDType0Font extends PDSimpl
return descendentFont.getFontWidth(charCode);
}
+ @Override
+ public String encode(byte[] c, int offset, int length) throws IOException
+ {
+ if (hasToUnicode())
+ {
+ return super.encode(c, offset, length);
+ }
+ else
+ {
+ // TODO additional mapping if not Identity
+ return descendentFont.encode(c, offset, length);
+ }
+ }
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1AfmPfbFont.java Sun Jan 16 16:04:52 2011
@@ -125,7 +125,7 @@ public class PDType1AfmPfbFont extends P
AFMParser parser = new AFMParser(afm);
parser.parse();
metric = parser.getResult();
- setEncoding(afmToDictionary(new AFMEncoding(metric)));
+ setFontEncoding(afmToDictionary(new AFMEncoding(metric)));
// set the values
setBaseFont(metric.getFontName());
@@ -147,7 +147,7 @@ public class PDType1AfmPfbFont extends P
// widths
List<CharMetric> listmetric = metric.getCharMetrics();
- Encoding encoding = getEncoding();
+ Encoding encoding = getFontEncoding();
int maxWidths = 256;
List<Number> widths = new ArrayList(maxWidths);
Integer zero = new Integer(250);
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1059595&r1=1059594&r2=1059595&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Sun Jan 16 16:04:52 2011
@@ -176,7 +176,8 @@ public class PDType1Font extends PDSimpl
{
this();
setBaseFont( baseFont );
- setEncoding(new WinAnsiEncoding());
+ setFontEncoding(new WinAnsiEncoding());
+ setEncoding(COSName.WIN_ANSI_ENCODING);
}
/**
@@ -263,7 +264,7 @@ public class PDType1Font extends PDSimpl
protected void determineEncoding()
{
super.determineEncoding();
- Encoding fontEncoding = getEncoding();
+ Encoding fontEncoding = getFontEncoding();
if(fontEncoding == null)
{
FontMetric metric = getAFM();
@@ -272,8 +273,8 @@ public class PDType1Font extends PDSimpl
fontEncoding = new AFMEncoding( metric );
}
}
- getEncodingFromFont(getEncoding() == null);
- setEncoding(fontEncoding);
+ getEncodingFromFont(getFontEncoding() == null);
+ setFontEncoding(fontEncoding);
}
/**
@@ -306,7 +307,7 @@ public class PDType1Font extends PDSimpl
{
if (line.startsWith("currentdict end")) {
if (encoding != null)
- setEncoding(encoding);
+ setFontEncoding(encoding);
break;
}
if (line.startsWith("/Encoding"))
@@ -321,13 +322,13 @@ public class PDType1Font extends PDSimpl
}
// if there is already an encoding, we don't need to
// assign another one
- else if (getEncoding() == null)
+ else if (getFontEncoding() == null)
{
StringTokenizer st = new StringTokenizer(line);
// ignore the first token
st.nextElement();
String type1Encoding = st.nextToken();
- setEncoding(
+ setFontEncoding(
EncodingManager.INSTANCE.getEncoding(
COSName.getPDFName(type1Encoding)));
break;
@@ -387,7 +388,7 @@ public class PDType1Font extends PDSimpl
@Override
public String encode(byte[] c, int offset, int length) throws IOException
{
- if (type1CFont != null && getEncoding() == null)
+ if (type1CFont != null && getFontEncoding() == null)
{
return type1CFont.encode(c, offset, length);
}