You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2017/07/25 15:26:18 UTC

svn commit: r1802967 - /pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java

Author: tilman
Date: Tue Jul 25 15:26:18 2017
New Revision: 1802967

URL: http://svn.apache.org/viewvc?rev=1802967&view=rev
Log:
PDFBOX-3878: refactor double code and include patterns additional to xobject forms

Modified:
    pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java

Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java?rev=1802967&r1=1802966&r2=1802967&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java (original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java Tue Jul 25 15:26:18 2017
@@ -16,7 +16,14 @@
  */
 package org.apache.pdfbox.examples.util;
 
+import java.io.File;
+import java.io.IOException;
 import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pdfbox.contentstream.PDContentStream;
+import org.apache.pdfbox.contentstream.operator.Operator;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.pdfparser.PDFStreamParser;
 import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
@@ -26,12 +33,8 @@ import org.apache.pdfbox.pdmodel.PDResou
 import org.apache.pdfbox.pdmodel.common.PDStream;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
-import org.apache.pdfbox.contentstream.operator.Operator;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
 
 /**
  * This is an example on how to remove all text from PDF document.
@@ -73,7 +76,11 @@ public final class RemoveAllText
                 }
                 for (PDPage page : document.getPages())
                 {
-                    removeAllTextTokens(page, document);
+                    List<Object> newTokens = createTokensWithoutText(page);
+                    PDStream newContents = new PDStream(document);
+                    writeTokensToStream(newContents, newTokens);
+                    page.setContents(newContents);
+                    processResources(page.getResources());
                 }
                 document.save(args[1]);
             }
@@ -82,75 +89,63 @@ public final class RemoveAllText
 
     private static void processResources(PDResources resources) throws IOException
     {
-        Iterable<COSName> names = resources.getXObjectNames();
-        for (COSName name : names)
+        for (COSName name : resources.getXObjectNames())
         {
             PDXObject xobject = resources.getXObject(name);
             if (xobject instanceof PDFormXObject)
             {
-                removeAllTextTokens((PDFormXObject) xobject);
+                PDFormXObject formXObject = (PDFormXObject) xobject;
+                writeTokensToStream(formXObject.getContentStream(),
+                        createTokensWithoutText(formXObject));
+                processResources(formXObject.getResources());
             }
         }
-    }
-
-    private static void removeAllTextTokens(PDPage page, PDDocument document) throws IOException
-    {
-        PDFStreamParser parser = new PDFStreamParser(page);
-        parser.parse();
-        List<Object> tokens = parser.getTokens();
-        List<Object> newTokens = new ArrayList<>();
-        for (Object token : tokens)
+        for (COSName name : resources.getPatternNames())
         {
-            if (token instanceof Operator)
+            PDAbstractPattern pattern = resources.getPattern(name);
+            if (pattern instanceof PDTilingPattern)
             {
-                String opname = ((Operator) token).getName();
-                if ("TJ".equals(opname) || "Tj".equals(opname))
-                {
-                    // remove the one argument to this operator
-                    newTokens.remove(newTokens.size() - 1);
-                    continue;
-                }
+                PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
+                writeTokensToStream(tilingPattern.getContentStream(),
+                        createTokensWithoutText(tilingPattern));
+                processResources(tilingPattern.getResources());
             }
-            newTokens.add(token);
         }
-        PDStream newContents = new PDStream(document);
+    }
+
+    private static void writeTokensToStream(PDStream newContents, List<Object> newTokens) throws IOException
+    {
         try (OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE))
         {
             ContentStreamWriter writer = new ContentStreamWriter(out);
             writer.writeTokens(newTokens);
         }
-        page.setContents(newContents);
-        processResources(page.getResources());
     }
 
-    private static void removeAllTextTokens(PDFormXObject xobject) throws IOException
+    private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException
     {
-        PDStream stream = xobject.getContentStream();
-        PDFStreamParser parser = new PDFStreamParser(xobject);
-        parser.parse();
-        List<Object> tokens = parser.getTokens();
+        PDFStreamParser parser = new PDFStreamParser(contentStream);
+        Object token = parser.parseNextToken();
         List<Object> newTokens = new ArrayList<>();
-        for (Object token : tokens)
+        while (token != null)
         {
             if (token instanceof Operator)
             {
                 Operator op = (Operator) token;
                 if ("TJ".equals(op.getName()) || "Tj".equals(op.getName()) ||
-                     "'".equals(op.getName()) || "\"".equals(op.getName()))
+                    "'".equals(op.getName()) || "\"".equals(op.getName()))
                 {
                     // remove the one argument to this operator
                     newTokens.remove(newTokens.size() - 1);
+                    
+                    token = parser.parseNextToken();
                     continue;
                 }
             }
             newTokens.add(token);
+            token = parser.parseNextToken();
         }
-        try (OutputStream out = stream.createOutputStream(COSName.FLATE_DECODE))
-        {
-            ContentStreamWriter writer = new ContentStreamWriter(out);
-            writer.writeTokens(newTokens);
-        }
-        processResources(xobject.getResources());
+        return newTokens;
     }
 
     /**