You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pdfbox.apache.org by "Ludovic Davoine (JIRA)" <ji...@apache.org> on 2014/06/10 14:04:01 UTC

[jira] [Created] (PDFBOX-2128) CMYK images are not supported correctly in the PDJpeg class

Ludovic Davoine created PDFBOX-2128:
---------------------------------------

             Summary: CMYK images are not supported correctly in the PDJpeg class
                 Key: PDFBOX-2128
                 URL: https://issues.apache.org/jira/browse/PDFBOX-2128
             Project: PDFBox
          Issue Type: Bug
          Components: PDModel
    Affects Versions: 1.8.5
         Environment: Windows 7 Professional
Running jvm: Java HotSpot(TM) 64-Bit Server VM - 1.6.0_26-b03 - 20.1-b02 - Sun Microsystems Inc
            Reporter: Ludovic Davoine
             Fix For: 1.8.5


I have a PDF with CMYK images inside and i need to extract the images in the RGB format. But the PDJpeg class seems to not work correctly; the colors are bad.  Example:

- Original image in te PDF : http://ludoda.free.fr/IMAGE_IN_PDF.jpg
- Extracted image: http://ludoda.free.fr/IMAGE_EXTRACTED.jpg


You can download the PDF : http://ludoda.free.fr/PORSCHE_CMYK.PDF

and try my simple Test Case (I'm using PDFbox 1.8.5): 

import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDJpeg;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;

public class TestCase {
	
	public static void main(String[] args) 
	{
		try 
		{
			System.out.println("START EXTRACTING IMAGES...");
			read_pdf();
			System.out.println("COMPLETE");
		}
		catch (IOException ex) 
		{
		    System.out.println("" + ex);
		}

	}

	public static void read_pdf() throws IOException 
	{
		    PDDocument document = null; 
	        document = PDDocument.load("C:\\temp\\PORSCHE_CMYK.pdf");

	        @SuppressWarnings("unchecked")
			List<PDPage> pages = document.getDocumentCatalog().getAllPages();
		    Iterator<PDPage> iter = pages.iterator(); 
		    int i =1;

		    while (iter.hasNext())
		    {
		        PDPage page = (PDPage) iter.next();
		        PDResources resources = page.getResources();
		        Map<String, PDXObject> pageImages = resources.getXObjects();
		        if (pageImages != null)
		        { 
		            Iterator<String> imageIter = pageImages.keySet().iterator();
		            while (imageIter.hasNext())
		            {
		            	String key = (String) imageIter.next();
		            	if(pageImages.get(key) instanceof PDXObjectImage)
		                {
		                	PDJpeg image = (PDJpeg) pageImages.get(key);
		                	
		                	// Test 1 : write2file
		                	image.write2file("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i);
		                	
		                	// Test 2: getRGBImage
		                	BufferedImage bimage=image.getRGBImage();
		                	File outputfile = new File("C:\\workspace\\JAVA_PDFTools\\temp\\image" + i+"_buffered.jpg");
		                	ImageIO.write(bimage, "jpg", outputfile);
		                	i ++;
		                }
		            }
		        }
		    }

		}
}





--
This message was sent by Atlassian JIRA
(v6.2#6252)