You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/07/10 21:06:08 UTC
svn commit: r1359845 - in
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding:
MacRomanEncoding.java WinAnsiEncoding.java
Author: leleueri
Date: Tue Jul 10 19:06:08 2012
New Revision: 1359845
URL: http://svn.apache.org/viewvc?rev=1359845&view=rev
Log:
PDFBOX-1357 - Missing character encoding in MacRoman & WinAnsi
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java?rev=1359845&r1=1359844&r2=1359845&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java Tue Jul 10 19:06:08 2012
@@ -16,6 +16,8 @@
*/
package org.apache.pdfbox.encoding;
+import java.io.IOException;
+
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
@@ -248,6 +250,19 @@ public class MacRomanEncoding extends En
addCharacterEncoding( 0172, "z" );
addCharacterEncoding( 060, "zero" );
}
+
+ public String getName( int code ) throws IOException
+ {
+ if (!codeToName.containsKey(code) && code == 0312) {
+ /*
+ * The space character is also encoded as 0312 in MacRoman and 0240 in WinAnsi.
+ * The meaning of this duplicate code is "nonbreaking space" but it is
+ * typographically the same as space.
+ */
+ return "space";
+ }
+ return codeToName.get( code );
+ }
/**
* Convert this standard java object to a COS object.
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java?rev=1359845&r1=1359844&r2=1359845&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java Tue Jul 10 19:06:08 2012
@@ -263,13 +263,30 @@ public class WinAnsiEncoding extends Enc
public String getName( int code ) throws IOException
{
if (!codeToName.containsKey(code) && code > 040) {
- /*
- * According to the PDFReference Appendix D :
- * In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character.
- * However, only code 0225 is specifically assigned to the bullet character;
- * other codes are subject to future reassignment
- */
- return "bullet";
+ switch (code) {
+ case 0240:
+ /*
+ * The space character is also encoded as 0312 in MacRoman and 0240 in WinAnsi.
+ * The meaning of this duplicate code is "nonbreaking space" but it is
+ * typographically the same as space.
+ */
+ return "space";
+ case 0255:
+ /*
+ * The hyphen character is also encoded as 0255 in WinAnsi.
+ * The meaning of this duplicate code is "soft hyphen" but it is
+ * typographically the same as hyphen.
+ */
+ return "hyphen";
+ default:
+ /*
+ * According to the PDFReference Appendix D :
+ * In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character.
+ * However, only code 0225 is specifically assigned to the bullet character;
+ * other codes are subject to future reassignment
+ */
+ return "bullet";
+ }
}
return codeToName.get( code );
}