You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/03/06 11:00:28 UTC

svn commit: r1887253 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java

Author: tilman
Date: Sat Mar  6 11:00:28 2021
New Revision: 1887253

URL: http://svn.apache.org/viewvc?rev=1887253&view=rev
Log:
PDFBOX-5115: support soft hyphen

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java?rev=1887253&r1=1887252&r2=1887253&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java Sat Mar  6 11:00:28 2021
@@ -252,7 +252,7 @@ public class WinAnsiEncoding extends Enc
             {060, "zero"},
             // adding some additional mappings as defined in Appendix D of the pdf spec
             {0240, "nbspace"},
-            {0255, "hyphen"}
+            {0255, "sfthyphen"}
     };
 
     /**

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java?rev=1887253&r1=1887252&r2=1887253&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java Sat Mar  6 11:00:28 2021
@@ -383,4 +383,42 @@ class PDFontTest
 
         Files.delete(tempPdfFile.toPath());    
     }
+
+    /**
+     * PDFBOX-5115: U+00AD (soft hyphen) should work with WinAnsiEncoding. 
+     */
+    @Test
+    void testSoftHyphen() throws IOException
+    {
+        String text = "- \u00AD";
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        try (PDDocument doc = new PDDocument())
+        {
+            PDPage page = new PDPage();
+            doc.addPage(page);
+            PDFont font1 = PDType1Font.HELVETICA;
+            PDFont font2 = PDType0Font.load(doc, PDFontTest.class.getResourceAsStream(
+                    "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"));
+            try (PDPageContentStream cs = new PDPageContentStream(doc, page))
+            {
+                cs.beginText();
+                cs.newLineAtOffset(100, 500);
+                cs.setFont(font1, 10);
+                cs.showText(text);
+                cs.newLineAtOffset(0, 100);
+                cs.setFont(font2, 10);
+                cs.showText(text);
+                cs.endText();
+            }
+            doc.save(baos);
+        }
+        
+        try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
+        {
+            PDFTextStripper stripper = new PDFTextStripper();
+            stripper.setLineSeparator("\n");
+            String extractedText = stripper.getText(doc);
+            assertEquals(text + "\n" + text, extractedText.trim());
+        }
+    }
 }