You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pdfbox.apache.org by "Andreas Lehmkühler (JIRA)" <ji...@apache.org> on 2015/02/18 11:45:11 UTC

[jira] [Comment Edited] (PDFBOX-2688) sun.java2d.Disposer leak when using pdf to image conversion in a server(tomcat)

    [ https://issues.apache.org/jira/browse/PDFBOX-2688?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14324878#comment-14324878 ] 

Andreas Lehmkühler edited comment on PDFBOX-2688 at 2/18/15 10:45 AM:
----------------------------------------------------------------------

Following is a single file servlet application running in tomcat that should reproduce the problem.

And we can reproduce the problem using apache benchmark like 

ab -n 100000 -c 10 http://localhost:8980/appContext/PDFToImage

{code}
package pdfbox2688;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;

import javax.imageio.ImageIO;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;

/**
 * Servlet implementation class PDFToImageConversionServlet
 */
@WebServlet("/PDFToImage")
public class PDFToImageConversionServlet extends HttpServlet {
	private static final long serialVersionUID = 1L;

	/**
	 * @see HttpServlet#HttpServlet()
	 */
	public PDFToImageConversionServlet() {
		super();
		// TODO Auto-generated constructor stub
	}

	/**
	 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
	 */
	@Override
	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		File pdfFile = getPDFFile();
		if (pdfFile != null) {
			BufferedImage image = rasterizeUsingPdfBox(pdfFile);
			ByteArrayOutputStream baos = new ByteArrayOutputStream();
			ImageIO.write(image, "png", baos);
			response.setContentType("image/png");
			response.getOutputStream().write(baos.toByteArray());
			response.flushBuffer();
		}
	}

	private BufferedImage rasterizeUsingPdfBox(File pdfFile) throws IOException {
		BufferedImage image = null;
		PDDocument document = null;
		PDPage page = null;
		try {
			document = PDDocument.loadNonSeq(pdfFile, null);
			@SuppressWarnings("unchecked")
			List<PDPage> pages = document.getDocumentCatalog().getAllPages();
			page = pages.get(0);
			int imageType = BufferedImage.TYPE_INT_ARGB;
			image = page.convertToImage(imageType, 300);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (document != null) {
				if (page != null) {
					page.clear();
				}
				document.close();
			}
		}
		return image;
	}

	private File getPDFFile() throws IOException {
		InputStream is = null;
		FileOutputStream pdfOS = null;
		try {
			URL url = new URL("http://www.xmlpdf.com/manualfiles/hello-world.pdf");
			// "https://github.com/mozilla/pdf.js/raw/master/examples/helloworld/helloworld.pdf");
			is = url.openStream();

			File pdfFile = File.createTempFile("Testpdf", ".pdf", new File("/tmp"));
			pdfOS = new FileOutputStream(pdfFile);

			byte[] buf = new byte[4096];
			int n;
			while ((n = is.read(buf)) >= 0) {
				pdfOS.write(buf, 0, n);
			}
			pdfOS.close();
			is.close();
			return pdfFile;
		} catch (Exception e) {
			return null;
		} finally {
			if (is != null) {
				is.close();
			}
			if (pdfOS != null) {
				pdfOS.close();
			}
		}
	}

}
{code}


was (Author: akhanal):
Following is a single file servlet application running in tomcat that should reproduce the problem.

And we can reproduce the problem using apache benchmark like 

ab -n 100000 -c 10 http://localhost:8980/appContext/PDFToImage


package pdfbox2688;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;

import javax.imageio.ImageIO;
import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;

/**
 * Servlet implementation class PDFToImageConversionServlet
 */
@WebServlet("/PDFToImage")
public class PDFToImageConversionServlet extends HttpServlet {
	private static final long serialVersionUID = 1L;

	/**
	 * @see HttpServlet#HttpServlet()
	 */
	public PDFToImageConversionServlet() {
		super();
		// TODO Auto-generated constructor stub
	}

	/**
	 * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
	 */
	@Override
	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		File pdfFile = getPDFFile();
		if (pdfFile != null) {
			BufferedImage image = rasterizeUsingPdfBox(pdfFile);
			ByteArrayOutputStream baos = new ByteArrayOutputStream();
			ImageIO.write(image, "png", baos);
			response.setContentType("image/png");
			response.getOutputStream().write(baos.toByteArray());
			response.flushBuffer();
		}
	}

	private BufferedImage rasterizeUsingPdfBox(File pdfFile) throws IOException {
		BufferedImage image = null;
		PDDocument document = null;
		PDPage page = null;
		try {
			document = PDDocument.loadNonSeq(pdfFile, null);
			@SuppressWarnings("unchecked")
			List<PDPage> pages = document.getDocumentCatalog().getAllPages();
			page = pages.get(0);
			int imageType = BufferedImage.TYPE_INT_ARGB;
			image = page.convertToImage(imageType, 300);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (document != null) {
				if (page != null) {
					page.clear();
				}
				document.close();
			}
		}
		return image;
	}

	private File getPDFFile() throws IOException {
		InputStream is = null;
		FileOutputStream pdfOS = null;
		try {
			URL url = new URL("http://www.xmlpdf.com/manualfiles/hello-world.pdf");
			// "https://github.com/mozilla/pdf.js/raw/master/examples/helloworld/helloworld.pdf");
			is = url.openStream();

			File pdfFile = File.createTempFile("Testpdf", ".pdf", new File("/tmp"));
			pdfOS = new FileOutputStream(pdfFile);

			byte[] buf = new byte[4096];
			int n;
			while ((n = is.read(buf)) >= 0) {
				pdfOS.write(buf, 0, n);
			}
			pdfOS.close();
			is.close();
			return pdfFile;
		} catch (Exception e) {
			return null;
		} finally {
			if (is != null) {
				is.close();
			}
			if (pdfOS != null) {
				pdfOS.close();
			}
		}
	}

}


> sun.java2d.Disposer leak when using pdf to image conversion in a server(tomcat)
> -------------------------------------------------------------------------------
>
>                 Key: PDFBOX-2688
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-2688
>             Project: PDFBox
>          Issue Type: Bug
>    Affects Versions: 1.8.8
>            Reporter: Ankit Khanal
>
> I am running with 6GB of heap space and running PDF to PNG conversion in a servlet container(tomcat). This happens only when running thousands of requests for conversion.
> JVM memory statistics shows heap space never going above 1GB and non-heap memory is also constant but the linux process or windows process seems to consume around 8GB of memory.
> Heap dump shows that the largest object is sun.java2d.Disposer and is around 200MB.
> It seems that the leaked memory is native memory used by java2d and not accounted in the heap memory statistic but this growth of sun.java2d.Disposer memory is proportional to the growth of process memory(linux 'top' command).
> {code}
> 		BufferedImage image = null;
> 		ByteArrayInputStream pdfStream = getpdfbytesfromExistingDoc();
> 		PDDocument document = null;
> 		PDPage page = null;
> 		COSDocument cosDoc = null;
> 		PDFParser parser = null;
> 		try {
> 			parser = new PDFParser(pdfStream);
> 			parser.parse();
> 			cosDoc = parser.getDocument();
> 			document = new PDDocument(cosDoc);
> 			@SuppressWarnings("unchecked")
> 			List<PDPage> pages = document.getDocumentCatalog().getAllPages();
> 			page = pages.get(0);
> 			int imageType = BufferedImage.TYPE_INT_ARGB;
> 			image = page.convertToImage(imageType, 72);
> 		} finally {
> 			if (cosDoc != null) {
> 				cosDoc.close();
> 			}
> 			if (parser != null) {
> 				parser.clearResources();
> 			}
> 			if (document != null) {
> 				if (page != null) {
> 					page.clear();
> 				}
> 				document.close();
> 			}
> 		}
> 		return image;
> 	}
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@pdfbox.apache.org
For additional commands, e-mail: dev-help@pdfbox.apache.org