You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/05/29 11:07:12 UTC

svn commit: r1598223 - /pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java

Author: tilman
Date: Thu May 29 09:07:11 2014
New Revision: 1598223

URL: http://svn.apache.org/r1598223
Log:
PDFBOX-2101: save JPEG directly, as suggested by Jeremias Maerki

Modified:
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1598223&r1=1598222&r2=1598223&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Thu May 29 09:07:11 2014
@@ -20,9 +20,14 @@ import java.awt.image.BufferedImage;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.io.IOUtils;
 
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
@@ -52,6 +57,14 @@ public class ExtractImages
     private static final String ADDKEY = "-addkey";
     private static final String NONSEQ = "-nonSeq";
 
+    private static final List<String> DCT_FILTERS = new ArrayList<String>();
+
+    static
+    {
+        DCT_FILTERS.add( COSName.DCT_DECODE.getName() );
+        DCT_FILTERS.add( COSName.DCT_DECODE_ABBREVIATION.getName() );
+    }
+
     private ExtractImages()
     {
     }
@@ -216,6 +229,19 @@ public class ExtractImages
         }
         resources.clear();
     }
+    
+    private void writeJpeg2OutputStream(PDImageXObject xobj, OutputStream out)
+            throws IOException
+    {
+        InputStream data = xobj.getPDStream().getPartiallyFilteredStream(DCT_FILTERS);
+        byte[] buf = new byte[1024];
+        int amountRead;
+        while ((amountRead = data.read(buf)) != -1)
+        {
+            out.write(buf, 0, amountRead);
+        }
+        IOUtils.closeQuietly(data);
+    }
 
     /**
      * Writes the image to a file with the filename + an appropriate suffix, like "Image.jpg".
@@ -236,6 +262,10 @@ public class ExtractImages
                 {
                     TIFFInputStream.writeToOutputStream(xobj, out);
                 }
+                else if ("jpg".equals(xobj.getSuffix()))
+                {
+                    writeJpeg2OutputStream(xobj, out);
+                }
                 else
                 {
                     ImageIOUtil.writeImage(image, xobj.getSuffix(), out);