You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/09/26 17:34:41 UTC
svn commit: r1627810 - in
/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font:
PDFont.java PDType1CFont.java PDType1Font.java
Author: lehmi
Date: Fri Sep 26 15:34:41 2014
New Revision: 1627810
URL: http://svn.apache.org/r1627810
Log:
PDFBOX-2377: overhaul the text extraction for Type1C fonts
Modified:
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java?rev=1627810&r1=1627809&r2=1627810&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDFont.java Fri Sep 26 15:34:41 2014
@@ -570,14 +570,18 @@ public abstract class PDFont implements
{
for( int i=0; i<256; i++ )
{
- try
- {
- SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i}, "ISO-8859-1" );
- }
- catch (UnsupportedEncodingException e)
+ // ISO-8859-1 doesn't support the whole range
+ if (i >= 32 && (i < 127 || i > 159))
{
- // Nothing should happen here
- LOG.error(e,e);
+ try
+ {
+ SINGLE_CHAR_STRING[i] = new String( new byte[] {(byte)i}, "ISO-8859-1" );
+ }
+ catch (UnsupportedEncodingException e)
+ {
+ // Nothing should happen here
+ LOG.error(e,e);
+ }
}
for( int j=0; j<256; j++ )
{
@@ -594,7 +598,7 @@ public abstract class PDFont implements
}
}
- private static String getStringFromArray( byte[] c, int offset, int length ) throws IOException
+ protected String getStringFromArray( byte[] c, int offset, int length ) throws IOException
{
String retval = null;
if( length == 1 )
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java?rev=1627810&r1=1627809&r2=1627810&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1CFont.java Fri Sep 26 15:34:41 2014
@@ -92,8 +92,6 @@ public class PDType1CFont extends PDSimp
private static final byte[] SPACE_BYTES = {(byte)32};
- private final int charOffset;
-
/**
* Constructor.
* @param fontDictionary the corresponding dictionary
@@ -101,7 +99,6 @@ public class PDType1CFont extends PDSimp
public PDType1CFont( COSDictionary fontDictionary ) throws IOException
{
super( fontDictionary );
- charOffset = getFirstChar() > -1 ? getFirstChar() - 1 : 0;
load();
}
@@ -124,11 +121,7 @@ public class PDType1CFont extends PDSimp
{
int code = getCodeFromArray(bytes, offset, length);
String character = null;
- if (charOffset > 0)
- {
- code -= charOffset;
- }
- else if (codeToSID.containsKey(code))
+ if (codeToSID.containsKey(code))
{
code = codeToSID.get(code);
}
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java?rev=1627810&r1=1627809&r2=1627810&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType1Font.java Fri Sep 26 15:34:41 2014
@@ -463,11 +463,13 @@ public class PDType1Font extends PDSimpl
{
if (type1CFont != null && getFontEncoding() == null)
{
- String character = type1CFont.encode(c, offset, length);
- if (character != null)
+ // check for ASCII values >= 32
+ if (length == 1 && c[offset] >= 32)
{
- return character;
+ return getStringFromArray( c, offset, length );
}
+ // handle values < 32 and negative byte values (int > 127)
+ return type1CFont.encode(c, offset, length);
}
return super.encode(c, offset, length);
}