You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/11/29 19:23:52 UTC
svn commit: r1642460 - in
/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font:
PDFont.java PDType1CFont.java
Author: lehmi
Date: Sat Nov 29 18:23:52 2014
New Revision: 1642460
URL: http://svn.apache.org/r1642460
Log:
PDFBOX-2377: use the internal mapping of a type1C font only if a charset is present within the font descriptor
Modified:
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1642460&r1=1642459&r2=1642460&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Sat Nov 29 18:23:52 2014
@@ -593,8 +593,15 @@ public abstract class PDFont implements
}
}
}
-
- private static String getStringFromArray( byte[] c, int offset, int length ) throws IOException
+ /**
+ * Map the given byte(s) to a string.
+ *
+ * @param c the byte array
+ * @param offset the offset of the byte(s)
+ * @param length the number of bytes, usually 1 or 2
+ * @return the mapped string
+ */
+ protected static String getStringFromArray( byte[] c, int offset, int length )
{
String retval = null;
if( length == 1 )
@@ -605,10 +612,6 @@ public abstract class PDFont implements
{
retval = DOUBLE_CHAR_STRING[(c[offset]+256)%256][(c[offset+1]+256)%256];
}
- else
- {
- throw new IOException( "Error:Unknown character length:" + length );
- }
return retval;
}
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1642460&r1=1642459&r2=1642460&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Sat Nov 29 18:23:52 2014
@@ -92,8 +92,6 @@ public class PDType1CFont extends PDSimp
private static final byte[] SPACE_BYTES = {(byte)32};
- private final int charOffset;
-
/**
* Constructor.
* @param fontDictionary the corresponding dictionary
@@ -101,7 +99,6 @@ public class PDType1CFont extends PDSimp
public PDType1CFont( COSDictionary fontDictionary ) throws IOException
{
super( fontDictionary );
- charOffset = getFirstChar() > -1 ? getFirstChar() - 1 : 0;
load();
}
@@ -122,19 +119,26 @@ public class PDType1CFont extends PDSimp
private String getCharacter(byte[] bytes, int offset, int length)
{
- int code = getCodeFromArray(bytes, offset, length);
String character = null;
- if (charOffset > 0)
- {
- code -= charOffset;
- }
- else if (codeToSID.containsKey(code))
+ // the pdf doesn't provide any encoding or toUnicode mapping
+ // we can use the font internal mapping, if a charset is defined
+ // otherwise the internal mapping may produce trash only
+ if (getFontDescriptor().getCharSet() != null)
{
- code = codeToSID.get(code);
+ int code = getCodeFromArray(bytes, offset, length);
+ if (codeToSID.containsKey(code))
+ {
+ code = codeToSID.get(code);
+ }
+ if (sidToCharacter.containsKey(code))
+ {
+ character = sidToCharacter.get(code);
+ }
}
- if (sidToCharacter.containsKey(code))
+ else
{
- character = sidToCharacter.get(code);
+ // map the byte code to a character
+ character = getStringFromArray(bytes, offset, length);
}
return character;
}