You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pdfbox.apache.org by kl...@lhsystems.com on 2014/10/08 17:51:10 UTC
Contribution to pdfbox
Dear developer-Team,
I'm evaluating PDFbox 1.8.6 for the creation of a pdf file for an editor.
At one point I got stuck, due to the missing of the correct characters from the PostScript font. The first characters I was missing were the German Umlaute. From studying the coding of the relevant Java class "PDType1AfmPfbFont" I found out, that the encoding from the font file, was not transfer to the pdf-file.
I made a change to the class, so the encoding from the afm-file will be transferred to the pdf-file. I rebuild the project and now I get the correct characters. I wonder, if you would like to put the changes to your project.
The changed class is here. I erased some special handling for the German Umlaut and added an encoding dictionary (red lines).
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.pdmodel.font;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.fontbox.afm.AFMParser;
import org.apache.fontbox.afm.CharMetric;
import org.apache.fontbox.afm.FontMetric;
import org.apache.fontbox.pfb.PfbParser;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.encoding.AFMEncoding;
import org.apache.pdfbox.encoding.DictionaryEncoding;
import org.apache.pdfbox.encoding.Encoding;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.common.PDStream;
/**
* This is implementation of the Type1 Font with a afm and a pfb file.
*
* @author <a href="mailto:m.g.n@gmx.de">Michael Niedermair</a>
* @version $Revision: 1.5 $
*/
public class PDType1AfmPfbFont extends PDType1Font
{
/**
* the buffersize.
*/
private static final int BUFFERSIZE = 0xffff;
/**
* The font metric.
*/
private FontMetric metric;
/**
* The font encoding dictionary.
*/
protected COSDictionary fontEncodingDic;
/**
* Create a new object.
*
* @param doc The PDF document that will hold the embedded font.
* @param afmname The font filename.
* @throws IOException If there is an error loading the data.
*/
public PDType1AfmPfbFont(final PDDocument doc, final String afmname) throws IOException
{
super();
InputStream afmin = new BufferedInputStream(new FileInputStream(afmname), BUFFERSIZE);
String pfbname = afmname.replaceAll(".AFM", "").replaceAll(".afm", "") + ".pfb";
InputStream pfbin = new BufferedInputStream(new FileInputStream(pfbname), BUFFERSIZE);
load(doc, afmin, pfbin);
}
/**
* Create a new object.
*
* @param doc The PDF document that will hold the embedded font.
* @param afm The afm input.
* @param pfb The pfb input.
* @throws IOException If there is an error loading the data.
*/
public PDType1AfmPfbFont(final PDDocument doc, final InputStream afm, final InputStream pfb) throws IOException
{
super();
load(doc, afm, pfb);
}
/**
* This will load a afm and pfb to be embedding into a document.
*
* @param doc The PDF document that will hold the embedded font.
* @param afm The afm input.
* @param pfb The pfb input.
* @throws IOException If there is an error loading the data.
*/
private void load(final PDDocument doc, final InputStream afm, final InputStream pfb) throws IOException
{
fontEncodingDic = null;
PDFontDescriptorDictionary fd = new PDFontDescriptorDictionary();
setFontDescriptor(fd);
// read the pfb
PfbParser pfbparser = new PfbParser(pfb);
pfb.close();
PDStream fontStream = new PDStream(doc, pfbparser.getInputStream(), false);
fontStream.getStream().setInt("Length", pfbparser.size());
for (int i = 0; i < pfbparser.getLengths().length; i++)
{
fontStream.getStream().setInt("Length" + (i + 1), pfbparser.getLengths()[i]);
}
fontStream.addCompression();
fd.setFontFile(fontStream);
// read the afm
AFMParser parser = new AFMParser(afm);
parser.parse();
metric = parser.getResult();
setFontEncoding(afmToDictionary(new AFMEncoding(metric)));
// set the values
setBaseFont(metric.getFontName());
fd.setFontName(metric.getFontName());
fd.setFontFamily(metric.getFamilyName());
fd.setNonSymbolic(true);
fd.setFontBoundingBox(new PDRectangle(metric.getFontBBox()));
fd.setItalicAngle(metric.getItalicAngle());
fd.setAscent(metric.getAscender());
fd.setDescent(metric.getDescender());
fd.setCapHeight(metric.getCapHeight());
fd.setXHeight(metric.getXHeight());
fd.setAverageWidth(metric.getAverageCharacterWidth());
fd.setCharacterSet(metric.getCharacterSet());
// get firstchar, lastchar
int firstchar = 255;
int lastchar = 0;
// widths
List<CharMetric> listmetric = metric.getCharMetrics();
Encoding encoding = getFontEncoding();
int maxWidths = 256;
List<Float> widths = new ArrayList<Float>(maxWidths);
int zero = 250;
Iterator<CharMetric> iter = listmetric.iterator();
for (int i = 0; i < maxWidths; i++)
{
widths.add((float)zero);
}
while (iter.hasNext())
{
CharMetric m = iter.next();
int n = m.getCharacterCode();
if (n > 0)
{
firstchar = Math.min(firstchar, n);
lastchar = Math.max(lastchar, n);
if (m.getWx() > 0)
{
int width = Math.round(m.getWx());
widths.set(n, (float)width);
// germandbls has 2 character codes !! Don't ask me why .....
// StandardEncoding = 0373 = 251
// WinANSIEncoding = 0337 = 223
if (m.getName().equals("germandbls") && n != 223)
{
widths.set(0337, (float)width);
}
}
}
}
setFirstChar(0);
setLastChar(255);
setWidths(widths);
}
/*
* This will generate a Encoding from the AFM-Encoding, because the AFM-Enconding isn't exported to the pdf and
* consequently the StandardEncoding is used so that any special character is missing I've copied the code from the
* pdfbox-forum posted by V0JT4 and made some additions concerning german umlauts see also
* https://sourceforge.net/forum/message.php?msg_id=4705274
*/
private DictionaryEncoding afmToDictionary(AFMEncoding encoding) throws java.io.IOException
{
COSArray array = new COSArray();
COSArray differEncoding = new COSArray();
array.add(COSInteger.ZERO);
for (int i = 0; i < 256; i++)
{
String name = encoding.getName(i);
COSName cosName = COSName.getPDFName(name);
if ( name!=null )
{
COSNumber cosDifferCode = COSNumber.get(new Integer(i).toString());
COSName cosDifferName = COSName.getPDFName(name);
differEncoding.add( cosDifferCode );
differEncoding.add( cosDifferName );
}
array.add(cosName);
}
COSDictionary dictionary = new COSDictionary();
dictionary.setItem(COSName.NAME, COSName.ENCODING);
dictionary.setItem(COSName.DIFFERENCES, array);
dictionary.setItem(COSName.BASE_ENCODING, COSName.STANDARD_ENCODING);
if ( differEncoding.size()> 0 )
{
fontEncodingDic = new COSDictionary();
fontEncodingDic.setItem(COSName.TYPE, COSName.ENCODING);
fontEncodingDic.setItem(COSName.DIFFERENCES, differEncoding);
COSDictionary afont = (COSDictionary) getCOSObject();
afont.setItem(COSName.ENCODING, fontEncodingDic);
}
return new DictionaryEncoding(dictionary);
}
/**
* The encoding dirctionary
*
* @return The dictionary of encoding
*
*/
public COSDictionary getFontEncodingDic()
{
return fontEncodingDic;
}
@Override
public void clear()
{
super.clear();
metric = null;
}
}
Best regard
Klaus Graaf
Lufthansa Systems
Dr. Klaus Graaf
Schützenwall 1
D-22844 Norderstedt
Büro: +49-40-5070-6849
Fax: +49-40-5070-7880
Handy: +49-151-58920261
Internet: http://www.lhsystems.com<http://www.lhsystems.com/>
Email: mailto:Klaus.Graaf@lhsystems.com
Sitz der Gesellschaft / Corporate Headquarters: Lufthansa Systems AS GmbH, Norderstedt, Registereintragung / Registration: Amtsgericht Norderstedt 3688NO
Geschaeftsfuehrung / Management Board: Bernd Appel
Re: Contribution to pdfbox
Posted by John Hewson <jo...@jahewson.com>.
Hi Klaus
Thanks, can you open an issue on JIRA at https://issues.apache.org/jira/browse/pdfbox
and if possible attach the file as an SVN patch (see https://ariejan.net/2007/07/03/how-to-create-and-apply-a-patch-with-subversion/) via More > Attach Files.
We might want to add some similar code to the trunk as well as 1.8, but I’ll figure that out later.
Thanks
-- John
On 8 Oct 2014, at 08:51, klaus.graaf@lhsystems.com wrote:
> Dear developer-Team,
> I'm evaluating PDFbox 1.8.6 for the creation of a pdf file for an editor.
>
> At one point I got stuck, due to the missing of the correct characters from the PostScript font. The first characters I was missing were the German Umlaute. From studying the coding of the relevant Java class "PDType1AfmPfbFont" I found out, that the encoding from the font file, was not transfer to the pdf-file.
>
> I made a change to the class, so the encoding from the afm-file will be transferred to the pdf-file. I rebuild the project and now I get the correct characters. I wonder, if you would like to put the changes to your project.
>
>
> The changed class is here. I erased some special handling for the German Umlaut and added an encoding dictionary (red lines).
>
> /*
> * Licensed to the Apache Software Foundation (ASF) under one or more
> * contributor license agreements. See the NOTICE file distributed with
> * this work for additional information regarding copyright ownership.
> * The ASF licenses this file to You under the Apache License, Version 2.0
> * (the "License"); you may not use this file except in compliance with
> * the License. You may obtain a copy of the License at
> *
> * http://www.apache.org/licenses/LICENSE-2.0
> *
> * Unless required by applicable law or agreed to in writing, software
> * distributed under the License is distributed on an "AS IS" BASIS,
> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> * See the License for the specific language governing permissions and
> * limitations under the License.
> */
> package org.apache.pdfbox.pdmodel.font;
>
> import java.io.BufferedInputStream;
> import java.io.FileInputStream;
> import java.io.IOException;
> import java.io.InputStream;
> import java.util.ArrayList;
> import java.util.Iterator;
> import java.util.List;
>
> import org.apache.fontbox.afm.AFMParser;
> import org.apache.fontbox.afm.CharMetric;
> import org.apache.fontbox.afm.FontMetric;
> import org.apache.fontbox.pfb.PfbParser;
> import org.apache.pdfbox.cos.COSArray;
> import org.apache.pdfbox.cos.COSDictionary;
> import org.apache.pdfbox.cos.COSInteger;
> import org.apache.pdfbox.cos.COSName;
> import org.apache.pdfbox.cos.COSNumber;
> import org.apache.pdfbox.encoding.AFMEncoding;
> import org.apache.pdfbox.encoding.DictionaryEncoding;
> import org.apache.pdfbox.encoding.Encoding;
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.common.PDRectangle;
> import org.apache.pdfbox.pdmodel.common.PDStream;
>
> /**
> * This is implementation of the Type1 Font with a afm and a pfb file.
> *
> * @author <a href="mailto:m.g.n@gmx.de">Michael Niedermair</a>
> * @version $Revision: 1.5 $
> */
> public class PDType1AfmPfbFont extends PDType1Font
> {
> /**
> * the buffersize.
> */
> private static final int BUFFERSIZE = 0xffff;
>
> /**
> * The font metric.
> */
> private FontMetric metric;
>
> /**
> * The font encoding dictionary.
> */
> protected COSDictionary fontEncodingDic;
>
> /**
> * Create a new object.
> *
> * @param doc The PDF document that will hold the embedded font.
> * @param afmname The font filename.
> * @throws IOException If there is an error loading the data.
> */
> public PDType1AfmPfbFont(final PDDocument doc, final String afmname) throws IOException
> {
>
> super();
> InputStream afmin = new BufferedInputStream(new FileInputStream(afmname), BUFFERSIZE);
> String pfbname = afmname.replaceAll(".AFM", "").replaceAll(".afm", "") + ".pfb";
> InputStream pfbin = new BufferedInputStream(new FileInputStream(pfbname), BUFFERSIZE);
> load(doc, afmin, pfbin);
> }
>
> /**
> * Create a new object.
> *
> * @param doc The PDF document that will hold the embedded font.
> * @param afm The afm input.
> * @param pfb The pfb input.
> * @throws IOException If there is an error loading the data.
> */
> public PDType1AfmPfbFont(final PDDocument doc, final InputStream afm, final InputStream pfb) throws IOException
> {
> super();
> load(doc, afm, pfb);
> }
>
> /**
> * This will load a afm and pfb to be embedding into a document.
> *
> * @param doc The PDF document that will hold the embedded font.
> * @param afm The afm input.
> * @param pfb The pfb input.
> * @throws IOException If there is an error loading the data.
> */
> private void load(final PDDocument doc, final InputStream afm, final InputStream pfb) throws IOException
> {
> fontEncodingDic = null;
> PDFontDescriptorDictionary fd = new PDFontDescriptorDictionary();
> setFontDescriptor(fd);
>
> // read the pfb
> PfbParser pfbparser = new PfbParser(pfb);
> pfb.close();
>
> PDStream fontStream = new PDStream(doc, pfbparser.getInputStream(), false);
> fontStream.getStream().setInt("Length", pfbparser.size());
> for (int i = 0; i < pfbparser.getLengths().length; i++)
> {
> fontStream.getStream().setInt("Length" + (i + 1), pfbparser.getLengths()[i]);
> }
> fontStream.addCompression();
> fd.setFontFile(fontStream);
>
> // read the afm
> AFMParser parser = new AFMParser(afm);
> parser.parse();
> metric = parser.getResult();
> setFontEncoding(afmToDictionary(new AFMEncoding(metric)));
>
> // set the values
> setBaseFont(metric.getFontName());
> fd.setFontName(metric.getFontName());
> fd.setFontFamily(metric.getFamilyName());
> fd.setNonSymbolic(true);
> fd.setFontBoundingBox(new PDRectangle(metric.getFontBBox()));
> fd.setItalicAngle(metric.getItalicAngle());
> fd.setAscent(metric.getAscender());
> fd.setDescent(metric.getDescender());
> fd.setCapHeight(metric.getCapHeight());
> fd.setXHeight(metric.getXHeight());
> fd.setAverageWidth(metric.getAverageCharacterWidth());
> fd.setCharacterSet(metric.getCharacterSet());
>
> // get firstchar, lastchar
> int firstchar = 255;
> int lastchar = 0;
>
> // widths
> List<CharMetric> listmetric = metric.getCharMetrics();
> Encoding encoding = getFontEncoding();
> int maxWidths = 256;
> List<Float> widths = new ArrayList<Float>(maxWidths);
> int zero = 250;
> Iterator<CharMetric> iter = listmetric.iterator();
> for (int i = 0; i < maxWidths; i++)
> {
> widths.add((float)zero);
> }
> while (iter.hasNext())
> {
> CharMetric m = iter.next();
> int n = m.getCharacterCode();
> if (n > 0)
> {
> firstchar = Math.min(firstchar, n);
> lastchar = Math.max(lastchar, n);
> if (m.getWx() > 0)
> {
> int width = Math.round(m.getWx());
> widths.set(n, (float)width);
> // germandbls has 2 character codes !! Don't ask me why .....
> // StandardEncoding = 0373 = 251
> // WinANSIEncoding = 0337 = 223
> if (m.getName().equals("germandbls") && n != 223)
> {
> widths.set(0337, (float)width);
> }
> }
> }
> }
> setFirstChar(0);
> setLastChar(255);
> setWidths(widths);
> }
>
> /*
> * This will generate a Encoding from the AFM-Encoding, because the AFM-Enconding isn't exported to the pdf and
> * consequently the StandardEncoding is used so that any special character is missing I've copied the code from the
> * pdfbox-forum posted by V0JT4 and made some additions concerning german umlauts see also
> * https://sourceforge.net/forum/message.php?msg_id=4705274
> */
> private DictionaryEncoding afmToDictionary(AFMEncoding encoding) throws java.io.IOException
> {
> COSArray array = new COSArray();
> COSArray differEncoding = new COSArray();
> array.add(COSInteger.ZERO);
> for (int i = 0; i < 256; i++)
> {
> String name = encoding.getName(i);
> COSName cosName = COSName.getPDFName(name);
> if ( name!=null )
> {
> COSNumber cosDifferCode = COSNumber.get(new Integer(i).toString());
> COSName cosDifferName = COSName.getPDFName(name);
> differEncoding.add( cosDifferCode );
> differEncoding.add( cosDifferName );
> }
> array.add(cosName);
> }
>
> COSDictionary dictionary = new COSDictionary();
> dictionary.setItem(COSName.NAME, COSName.ENCODING);
> dictionary.setItem(COSName.DIFFERENCES, array);
> dictionary.setItem(COSName.BASE_ENCODING, COSName.STANDARD_ENCODING);
>
> if ( differEncoding.size()> 0 )
> {
> fontEncodingDic = new COSDictionary();
> fontEncodingDic.setItem(COSName.TYPE, COSName.ENCODING);
> fontEncodingDic.setItem(COSName.DIFFERENCES, differEncoding);
>
> COSDictionary afont = (COSDictionary) getCOSObject();
> afont.setItem(COSName.ENCODING, fontEncodingDic);
> }
>
> return new DictionaryEncoding(dictionary);
> }
> /**
> * The encoding dirctionary
> *
> * @return The dictionary of encoding
> *
> */
> public COSDictionary getFontEncodingDic()
> {
> return fontEncodingDic;
> }
> @Override
> public void clear()
> {
> super.clear();
> metric = null;
> }
> }
>
>
>
> Best regard
> Klaus Graaf
>
> Lufthansa Systems
> Dr. Klaus Graaf
> Schützenwall 1
> D-22844 Norderstedt
>
> Büro: +49-40-5070-6849
> Fax: +49-40-5070-7880
> Handy: +49-151-58920261
> Internet: http://www.lhsystems.com<http://www.lhsystems.com/>
> Email: mailto:Klaus.Graaf@lhsystems.com
>
>
>
> Sitz der Gesellschaft / Corporate Headquarters: Lufthansa Systems AS GmbH, Norderstedt, Registereintragung / Registration: Amtsgericht Norderstedt 3688NO
> Geschaeftsfuehrung / Management Board: Bernd Appel
>
>