You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/06/11 14:43:40 UTC

svn commit: r1601874 - /pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java

Author: tilman
Date: Wed Jun 11 12:43:40 2014
New Revision: 1601874

URL: http://svn.apache.org/r1601874
Log:
PDFBOX-2128: add parameter directJPEG to force 1:1 extraction of JPEGs

Modified:
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1601874&r1=1601873&r2=1601874&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Wed Jun 11 12:43:40 2014
@@ -62,6 +62,7 @@ public class ExtractImages
     private static final String PREFIX = "-prefix";
     private static final String ADDKEY = "-addkey";
     private static final String NONSEQ = "-nonSeq";
+    private static final String DIRECTJPEG = "-directJPEG";
 
     private static final List<String> DCT_FILTERS = new ArrayList<String>();
 
@@ -101,6 +102,7 @@ public class ExtractImages
             String prefix = null;
             boolean addKey = false;
             boolean useNonSeqParser = false;
+            boolean directJPEG = false;
             for( int i=0; i<args.length; i++ )
             {
                 if( args[i].equals( PASSWORD ) )
@@ -129,6 +131,10 @@ public class ExtractImages
                 {
                     useNonSeqParser = true;
                 }
+                else if( args[i].equals( DIRECTJPEG ) )
+                {
+                    directJPEG = true;
+                }
                 else
                 {
                     if( pdfFile == null )
@@ -180,7 +186,7 @@ public class ExtractImages
                         PDPage page = (PDPage)iter.next();
                         PDResources resources = page.getResources();
                         // extract all XObjectImages which are part of the page resources
-                        processResources(resources, prefix, addKey);
+                        processResources(resources, prefix, addKey, directJPEG);
                     }
                 }
                 finally
@@ -194,7 +200,8 @@ public class ExtractImages
         }
     }
 
-    private void processResources(PDResources resources, String prefix, boolean addKey) throws IOException
+    private void processResources(PDResources resources, String prefix, 
+            boolean addKey, boolean directJPEG) throws IOException
     {
         if (resources == null)
         {
@@ -231,7 +238,7 @@ public class ExtractImages
                     imageCounter++;
 
                     System.out.println( "Writing image:" + name );
-                    write2file( image, name );
+                    write2file( image, name, directJPEG );
                     image.clear(); // PDFBOX-2101 get rid of cache ASAP
                 }
                 // maybe there are more images embedded in a form object
@@ -239,7 +246,7 @@ public class ExtractImages
                 {
                     PDFormXObject xObjectForm = (PDFormXObject)xobject;
                     PDResources formResources = xObjectForm.getResources();
-                    processResources(formResources, prefix, addKey);
+                    processResources(formResources, prefix, addKey, directJPEG);
                 }
             }
         }
@@ -261,7 +268,7 @@ public class ExtractImages
      * @param filename the filename
      * @throws IOException When somethings wrong with the corresponding file.
      */
-    private void write2file(PDImageXObject xobj, String filename) throws IOException
+    private void write2file(PDImageXObject xobj, String filename, boolean directJPEG) throws IOException
     {
         if (xobj.getSuffix() == null || xobj.getSuffix().isEmpty())
         {
@@ -284,10 +291,11 @@ public class ExtractImages
                 else if ("jpg".equals(xobj.getSuffix()))
                 {
                     String colorSpaceName = xobj.getColorSpace().getName();
-                    if (PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) ||
+                    if (directJPEG ||
+                            PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) ||
                             PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName))
                     {
-                        // RGB and Gray colorspace:
+                        // directJPEG option, RGB or Gray colorspace:
                         // get and write the unmodified JPEG stream
                         writeJpeg2OutputStream(xobj, out);
                     }
@@ -325,6 +333,7 @@ public class ExtractImages
             "  -prefix  <image-prefix>      Image prefix(default to pdf name)\n" +
             "  -addkey                      add the internal image key to the file name\n" +
             "  -nonSeq                      Enables the new non-sequential parser\n" +
+            "  -directJPEG                  Forces the direct extraction of JPEG images regardless of colorspace\n" +
             "  <PDF file>                   The PDF document to use\n"
             );
         System.exit( 1 );