You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2014/12/12 22:50:41 UTC
svn commit: r1645083 - in /pdfbox/trunk:
fontbox/src/main/java/org/apache/fontbox/ttf/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/
Author: jahewson
Date: Fri Dec 12 21:50:41 2014
New Revision: 1645083
URL: http://svn.apache.org/r1645083
Log:
PDFBOX-2524: Generate ToUnicode CMap for embedded CIDFontType2
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java (with props)
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java?rev=1645083&r1=1645082&r2=1645083&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CmapSubtable.java Fri Dec 12 21:50:41 2014
@@ -541,16 +541,16 @@ public class CmapSubtable
}
/**
- * Returns the character code for the given GID.
+ * Returns the character code for the given GID, or null if there is none.
*
* @param gid glyph id
* @return character code
*/
- public int getCharacterCode(int gid)
+ public Integer getCharacterCode(int gid)
{
if (gid < 0 || gid >= glyphIdToCharacterCode.length)
{
- return 0;
+ return null;
}
return glyphIdToCharacterCode[gid];
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java?rev=1645083&r1=1645082&r2=1645083&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDCIDFontType2Embedder.java Fri Dec 12 21:50:41 2014
@@ -17,6 +17,8 @@
package org.apache.pdfbox.pdmodel.font;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.IOException;
@@ -25,6 +27,7 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.common.PDStream;
/**
* Embedded PDCIDFontType2 builder. Helper class to populate a PDCIDFontType2 and its parent
@@ -63,6 +66,51 @@ final class PDCIDFontType2Embedder exten
COSArray descendantFonts = new COSArray();
descendantFonts.add(cidFont);
dict.setItem(COSName.DESCENDANT_FONTS, descendantFonts);
+
+ // ToUnicode CMap
+ dict.setItem(COSName.TO_UNICODE, createToUnicodeCMap(document));
+ }
+
+ private PDStream createToUnicodeCMap(PDDocument document) throws IOException
+ {
+ ToUnicodeWriter toUniWriter = new ToUnicodeWriter("Adobe", "Identity", 0);
+ boolean hasSurrogates = false;
+ for (int gid = 1, max = ttf.getMaximumProfile().getNumGlyphs(); gid <= max; gid++)
+ {
+ Integer codePoint = cmap.getCharacterCode(gid);
+ // skip composite glyph components that have no code point
+ if (codePoint != null)
+ {
+ if (codePoint > 0xFFFF)
+ {
+ hasSurrogates = true;
+ }
+ toUniWriter.add(gid, new String(new int[]{ codePoint }, 0, 1));
+ }
+ }
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ toUniWriter.writeTo(out);
+ InputStream cMapStream = new ByteArrayInputStream(out.toByteArray());
+
+ // ToUnicode stream dictionary
+ PDStream stream = new PDStream(document, cMapStream, false);
+ stream.getStream().setItem(COSName.TYPE, COSName.CMAP);
+ stream.getStream().setName(COSName.CMAPNAME, toUniWriter.getName());
+ stream.getStream().setItem(COSName.CIDSYSTEMINFO, toCIDSystemInfo("Adobe", "Identity", 0));
+ stream.addCompression();
+
+ // surrogate code points, requires PDF 1.5
+ if (hasSurrogates)
+ {
+ float version = document.getDocument().getVersion();
+ if (version < 1.5)
+ {
+ document.getDocument().setVersion(1.5f);
+ }
+ }
+
+ return stream;
}
private COSDictionary toCIDSystemInfo(String registry, String ordering, int supplement)
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java?rev=1645083&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java Fri Dec 12 21:50:41 2014
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.pdmodel.font;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import org.apache.pdfbox.util.Charsets;
+
+/**
+ * Writes ToUnicode Mapping Files.
+ *
+ * @author John Hewson
+ */
+final class ToUnicodeWriter
+{
+ private final String registry, ordering;
+ private final int supplement;
+ private final Map<Integer, String> cidToUnicode = new TreeMap<Integer, String>();
+ private int wMode;
+
+ /**
+ * Creates a new ToUnicode CMap writer.
+ *
+ * @param registry character collection registry
+ * @param ordering character ordering
+ * @param supplement character supplement
+ */
+ public ToUnicodeWriter(String registry, String ordering, int supplement)
+ {
+ this.registry = registry;
+ this.ordering = ordering;
+ this.supplement = supplement;
+ this.wMode = 0;
+ }
+
+ /**
+ * Sets the WMode (writing mode) of this CMap.
+ *
+ * @param wMode 1 for vertical, 0 for horizontal (default)
+ */
+ public void setWMode(int wMode)
+ {
+ this.wMode = wMode;
+ }
+
+ /**
+ * Adds the given CID to Unicode mapping.
+ *
+ * @param cid CID
+ * @param text Unicode text, up to 512 bytes.
+ */
+ public void add(int cid, String text)
+ {
+ if ( cid < 0 || cid > 0xFFFF)
+ {
+ throw new IllegalArgumentException("CID is not valid");
+ }
+
+ if (text == null || text.isEmpty())
+ {
+ throw new IllegalArgumentException("Text is null or empty");
+ }
+
+ cidToUnicode.put(cid, text);
+ }
+
+ /**
+ * Returns the name of the CMap.
+ */
+ public String getName()
+ {
+ return registry + "-" + ordering + "-UCS";
+ }
+
+ /**
+ * Writes the CMap as ASCII to the given output stream.
+ *
+ * @param out ASCII output stream
+ * @throws IOException if the stream could not be written
+ */
+ public void writeTo(OutputStream out) throws IOException
+ {
+ BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out, Charsets.US_ASCII));
+
+ writeLine(writer, "/CIDInit /ProcSet findresource begin");
+ writeLine(writer, "12 dict begin\n");
+
+ writeLine(writer, "begincmap");
+ writeLine(writer, "/CIDSystemInfo");
+ writeLine(writer, "<< /Registry (" + registry + ")");
+ writeLine(writer, " /Ordering (" + ordering + ")");
+ writeLine(writer, " /Supplement " + supplement);
+ writeLine(writer, ">> def\n");
+
+ writeLine(writer, "/CMapName /" + getName() + " def");
+ writeLine(writer, "/CMapType 2 def\n"); // 2 = ToUnicode
+
+ if (wMode != 0)
+ {
+ writeLine(writer, "/WMode /" + wMode + " def");
+ }
+
+ // ToUnicode always uses 16-bit CIDs
+ writeLine(writer, "1 begincodespacerange");
+ writeLine(writer, "<0000> <FFFF>");
+ writeLine(writer, "endcodespacerange\n");
+
+ // CID -> Unicode mappings, we use ranges to generate a smaller CMap
+ List<Integer> srcFrom = new ArrayList<Integer>();
+ List<Integer> srcTo = new ArrayList<Integer>();
+ List<String> dstString = new ArrayList<String>();
+
+ int srcPrev = -1;
+ String dstPrev = null;
+
+ for (Map.Entry<Integer, String> entry : cidToUnicode.entrySet())
+ {
+ int cid = entry.getKey();
+ String text = entry.getValue();
+
+ if (cid == srcPrev + 1 &&
+ dstPrev.codePointCount(0, dstPrev.length()) == 1 &&
+ text.codePointAt(0) == dstPrev.codePointAt(0) + 1)
+ {
+ // extend range
+ srcTo.set(srcTo.size() - 1, cid);
+ }
+ else
+ {
+ // begin range
+ srcFrom.add(cid);
+ srcTo.add(cid);
+ dstString.add(text);
+ }
+ srcPrev = cid;
+ dstPrev = text;
+ }
+
+ // limit of 100 entries per operator
+ int batchCount = (int)Math.ceil(srcFrom.size() / 100.0);
+ for (int batch = 0; batch < batchCount; batch++)
+ {
+ int count = batch == batchCount - 1 ? srcFrom.size() % 100 : 100;
+ writer.write(count + " beginbfrange\n");
+ for (int j = 0; j < count; j++)
+ {
+ int index = batch * 100 + j;
+ writer.write('<');
+ writer.write(toHex(srcFrom.get(index)));
+ writer.write("> ");
+
+ writer.write('<');
+ writer.write(toHex(srcTo.get(index)));
+ writer.write("> ");
+
+ writer.write("<");
+ writer.write(stringToHex(dstString.get(index)));
+ writer.write(">\n");
+ }
+ writeLine(writer, "endbfrange\n");
+ }
+
+ // footer
+ writeLine(writer, "endcmap");
+ writeLine(writer, "CMapName currentdict /CMap");
+ writeLine(writer, "end");
+ writeLine(writer, "end");
+
+ writer.flush();
+ }
+
+ private void writeLine(BufferedWriter writer, String text) throws IOException
+ {
+ writer.write(text);
+ writer.write('\n');
+ }
+
+ private String toHex(int num)
+ {
+ return String.format("%04X", num);
+ }
+
+ private String stringToHex(String text)
+ {
+ // use of non-BMP code points requires PDF 1.5 or later, otherwise we're limited to UCS-2
+ StringBuilder sb = new StringBuilder();
+ for (byte b : text.getBytes(Charsets.UTF_16BE))
+ {
+ sb.append(String.format("%02X", b));
+ }
+ return sb.toString();
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
------------------------------------------------------------------------------
svn:eol-style = native