You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/07/10 21:06:08 UTC

svn commit: r1359845 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding: MacRomanEncoding.java WinAnsiEncoding.java

Author: leleueri
Date: Tue Jul 10 19:06:08 2012
New Revision: 1359845

URL: http://svn.apache.org/viewvc?rev=1359845&view=rev
Log:
PDFBOX-1357 - Missing character encoding in MacRoman & WinAnsi 

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java?rev=1359845&r1=1359844&r2=1359845&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java Tue Jul 10 19:06:08 2012
@@ -16,6 +16,8 @@
  */
 package org.apache.pdfbox.encoding;
 
+import java.io.IOException;
+
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSName;
 
@@ -248,6 +250,19 @@ public class MacRomanEncoding extends En
         addCharacterEncoding( 0172, "z" );
         addCharacterEncoding( 060, "zero" );
     }
+    
+    public String getName( int code ) throws IOException
+  	{
+  		if (!codeToName.containsKey(code) && code == 0312) {
+  				/*
+  				 * The space character is also encoded as 0312 in MacRoman and 0240 in WinAnsi. 
+  				 * The meaning of this duplicate code is "nonbreaking space" but it is 
+  				 * typographically the same as space. 
+  				 */
+  				return "space";
+  		}
+  		return codeToName.get( code );
+  	}
 
     /**
      * Convert this standard java object to a COS object.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java?rev=1359845&r1=1359844&r2=1359845&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java Tue Jul 10 19:06:08 2012
@@ -263,13 +263,30 @@ public class WinAnsiEncoding extends Enc
 	public String getName( int code ) throws IOException
 	{
 		if (!codeToName.containsKey(code) && code > 040) {
-			/*
-			 * According to the PDFReference Appendix D :
-			 * In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character. 
-			 * However, only code 0225 is specifically assigned to the bullet character;
-			 * other codes are subject to future reassignment
-			 */
-			return "bullet";
+			switch (code) {
+			case 0240:
+				/*
+				 * The space character is also encoded as 0312 in MacRoman and 0240 in WinAnsi. 
+				 * The meaning of this duplicate code is "nonbreaking space" but it is 
+				 * typographically the same as space. 
+				 */
+				return "space";
+			case 0255:
+				/*
+				 * The hyphen character is also encoded as 0255 in WinAnsi. 
+				 * The meaning of this duplicate code is "soft hyphen" but it is 
+				 * typographically the same as hyphen. 
+				 */
+				return "hyphen";
+			default:
+				/*
+				 * According to the PDFReference Appendix D :
+				 * In WinAnsiEncoding, all unused codes greater than 40 map to the bullet character. 
+				 * However, only code 0225 is specifically assigned to the bullet character;
+				 * other codes are subject to future reassignment
+				 */
+				return "bullet";
+			}
 		}
 		return codeToName.get( code );
 	}