You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2017/06/21 06:33:12 UTC
svn commit: r1799390 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
Author: tilman
Date: Wed Jun 21 06:33:12 2017
New Revision: 1799390
URL: http://svn.apache.org/viewvc?rev=1799390&view=rev
Log:
PDFBOX-3833: don't treat prolonged sound mark as a diacritic
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?rev=1799390&r1=1799389&r2=1799390&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Wed Jun 21 06:33:12 2017
@@ -689,6 +689,14 @@ public final class TextPosition
{
return false;
}
+ if ("ã¼".equals(text))
+ {
+ // PDFBOX-3833: ã¼ is not a real diacritic like ¨ or Ë, it just changes the
+ // pronunciation of the previous sound, and is printed after the previous glyph
+ // http://www.japanesewithanime.com/2017/04/prolonged-sound-mark.html
+ // Ignoring it as diacritic avoids trouble if it slightly overlaps with the next glyph.
+ return false;
+ }
int type = Character.getType(text.charAt(0));
return type == Character.NON_SPACING_MARK ||
type == Character.MODIFIER_SYMBOL ||