You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2021/03/06 11:00:28 UTC
svn commit: r1887253 - in /pdfbox/trunk/pdfbox/src:
main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java
test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java
Author: tilman
Date: Sat Mar 6 11:00:28 2021
New Revision: 1887253
URL: http://svn.apache.org/viewvc?rev=1887253&view=rev
Log:
PDFBOX-5115: support soft hyphen
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java?rev=1887253&r1=1887252&r2=1887253&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java Sat Mar 6 11:00:28 2021
@@ -252,7 +252,7 @@ public class WinAnsiEncoding extends Enc
{060, "zero"},
// adding some additional mappings as defined in Appendix D of the pdf spec
{0240, "nbspace"},
- {0255, "hyphen"}
+ {0255, "sfthyphen"}
};
/**
Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java?rev=1887253&r1=1887252&r2=1887253&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/PDFontTest.java Sat Mar 6 11:00:28 2021
@@ -383,4 +383,42 @@ class PDFontTest
Files.delete(tempPdfFile.toPath());
}
+
+ /**
+ * PDFBOX-5115: U+00AD (soft hyphen) should work with WinAnsiEncoding.
+ */
+ @Test
+ void testSoftHyphen() throws IOException
+ {
+ String text = "- \u00AD";
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (PDDocument doc = new PDDocument())
+ {
+ PDPage page = new PDPage();
+ doc.addPage(page);
+ PDFont font1 = PDType1Font.HELVETICA;
+ PDFont font2 = PDType0Font.load(doc, PDFontTest.class.getResourceAsStream(
+ "/org/apache/pdfbox/resources/ttf/LiberationSans-Regular.ttf"));
+ try (PDPageContentStream cs = new PDPageContentStream(doc, page))
+ {
+ cs.beginText();
+ cs.newLineAtOffset(100, 500);
+ cs.setFont(font1, 10);
+ cs.showText(text);
+ cs.newLineAtOffset(0, 100);
+ cs.setFont(font2, 10);
+ cs.showText(text);
+ cs.endText();
+ }
+ doc.save(baos);
+ }
+
+ try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
+ {
+ PDFTextStripper stripper = new PDFTextStripper();
+ stripper.setLineSeparator("\n");
+ String extractedText = stripper.getText(doc);
+ assertEquals(text + "\n" + text, extractedText.trim());
+ }
+ }
}