You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pdfbox.apache.org by "Andreas Lehmkühler (JIRA)" <ji...@apache.org> on 2015/03/04 12:27:04 UTC
[jira] [Closed] (PDFBOX-936) No HTML Header using PDFText2HTML
[ https://issues.apache.org/jira/browse/PDFBOX-936?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Andreas Lehmkühler closed PDFBOX-936.
-------------------------------------
Resolution: Fixed
Fix Version/s: 1.6.0
Assignee: Andreas Lehmkühler
Works fine at least since 1.6.0 (I didn't check earlier version)
> No HTML Header using PDFText2HTML
> ---------------------------------
>
> Key: PDFBOX-936
> URL: https://issues.apache.org/jira/browse/PDFBOX-936
> Project: PDFBox
> Issue Type: Bug
> Components: Utilities
> Affects Versions: 1.3.1
> Environment: Ubuntu 10.10 / Netbeans / Java version "1.6.0_22"
> Reporter: Clement Igonet
> Assignee: Andreas Lehmkühler
> Fix For: 1.6.0
>
>
> The following code should output html string with this header:
> <!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN
> "http://www.w3.org/TR/html4/loose.dtd">
> <html><head><title></title>
> ... but it does not !
> Here is te test code:
> package fr.def.iss.vd2.mod_instruction_gui.view;
> import java.io.ByteArrayInputStream;
> import java.io.ByteArrayOutputStream;
> import java.io.IOException;
> import java.io.OutputStreamWriter;
> import java.io.Writer;
> import org.apache.pdfbox.exceptions.COSVisitorException;
> import org.apache.pdfbox.pdmodel.PDDocument;
> import org.apache.pdfbox.pdmodel.PDPage;
> import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
> import org.apache.pdfbox.pdmodel.font.PDFont;
> import org.apache.pdfbox.pdmodel.font.PDType1Font;
> import org.apache.pdfbox.util.PDFText2HTML;
> public class Test {
> public static void main(final String[] args) {
> byte[] buf = rawText2Pdf("Hell world");
> String html = pdf2Html(buf);
> System.out.println("html:" + html);
> }
> public static byte[] rawText2Pdf(String text) {
> ByteArrayOutputStream os = null;
> try {
> os = new ByteArrayOutputStream();
> PDDocument document =
> new PDDocument();
> PDPage page = new PDPage();
> document.addPage(page);
> PDFont font =
> PDType1Font.HELVETICA_BOLD;
> PDPageContentStream contentStream =
> new PDPageContentStream(
> document, page);
> contentStream.beginText();
> contentStream.setFont(font, 12);
> contentStream.moveTextPositionByAmount(
> 100, 700);
> contentStream.drawString(text);
> contentStream.endText();
> contentStream.close();
> document.save(os);
> document.close();
> } catch (COSVisitorException ex) {
> ex.printStackTrace();
> } catch (IOException ex) {
> ex.printStackTrace();
> }
> byte[] result = null;
> if (os != null) {
> result = os.toByteArray();
> }
> return result;
> }
> public static String pdf2Html(byte[] pdf) {
> String result = null;
> ByteArrayOutputStream os = null;
> PDFText2HTML stripper = null;
> StringBuilder buf = new StringBuilder();
> try {
> stripper = new PDFText2HTML("utf-8");
> ByteArrayInputStream is =
> new ByteArrayInputStream(pdf);
> PDDocument document =
> PDDocument.load(is);
> os = new ByteArrayOutputStream();
> Writer writer =
> new OutputStreamWriter(os, "utf-8");
> stripper.writeText(document, writer);
> writer.close();
> os.close();
> result = buf.toString()
> + stripper.getText(document);
> } catch (IOException ex) {
> ex.printStackTrace();
> }
> return result;
> }
> }
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: dev-help@pdfbox.apache.org