You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2017/07/25 15:26:18 UTC
svn commit: r1802967 -
/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java
Author: tilman
Date: Tue Jul 25 15:26:18 2017
New Revision: 1802967
URL: http://svn.apache.org/viewvc?rev=1802967&view=rev
Log:
PDFBOX-3878: refactor double code and include patterns additional to xobject forms
Modified:
pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java
Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java?rev=1802967&r1=1802966&r2=1802967&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java (original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java Tue Jul 25 15:26:18 2017
@@ -16,7 +16,14 @@
*/
package org.apache.pdfbox.examples.util;
+import java.io.File;
+import java.io.IOException;
import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.pdfbox.contentstream.PDContentStream;
+import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
@@ -26,12 +33,8 @@ import org.apache.pdfbox.pdmodel.PDResou
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
-import org.apache.pdfbox.contentstream.operator.Operator;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
+import org.apache.pdfbox.pdmodel.graphics.pattern.PDTilingPattern;
/**
* This is an example on how to remove all text from PDF document.
@@ -73,7 +76,11 @@ public final class RemoveAllText
}
for (PDPage page : document.getPages())
{
- removeAllTextTokens(page, document);
+ List<Object> newTokens = createTokensWithoutText(page);
+ PDStream newContents = new PDStream(document);
+ writeTokensToStream(newContents, newTokens);
+ page.setContents(newContents);
+ processResources(page.getResources());
}
document.save(args[1]);
}
@@ -82,75 +89,63 @@ public final class RemoveAllText
private static void processResources(PDResources resources) throws IOException
{
- Iterable<COSName> names = resources.getXObjectNames();
- for (COSName name : names)
+ for (COSName name : resources.getXObjectNames())
{
PDXObject xobject = resources.getXObject(name);
if (xobject instanceof PDFormXObject)
{
- removeAllTextTokens((PDFormXObject) xobject);
+ PDFormXObject formXObject = (PDFormXObject) xobject;
+ writeTokensToStream(formXObject.getContentStream(),
+ createTokensWithoutText(formXObject));
+ processResources(formXObject.getResources());
}
}
- }
-
- private static void removeAllTextTokens(PDPage page, PDDocument document) throws IOException
- {
- PDFStreamParser parser = new PDFStreamParser(page);
- parser.parse();
- List<Object> tokens = parser.getTokens();
- List<Object> newTokens = new ArrayList<>();
- for (Object token : tokens)
+ for (COSName name : resources.getPatternNames())
{
- if (token instanceof Operator)
+ PDAbstractPattern pattern = resources.getPattern(name);
+ if (pattern instanceof PDTilingPattern)
{
- String opname = ((Operator) token).getName();
- if ("TJ".equals(opname) || "Tj".equals(opname))
- {
- // remove the one argument to this operator
- newTokens.remove(newTokens.size() - 1);
- continue;
- }
+ PDTilingPattern tilingPattern = (PDTilingPattern) pattern;
+ writeTokensToStream(tilingPattern.getContentStream(),
+ createTokensWithoutText(tilingPattern));
+ processResources(tilingPattern.getResources());
}
- newTokens.add(token);
}
- PDStream newContents = new PDStream(document);
+ }
+
+ private static void writeTokensToStream(PDStream newContents, List<Object> newTokens) throws IOException
+ {
try (OutputStream out = newContents.createOutputStream(COSName.FLATE_DECODE))
{
ContentStreamWriter writer = new ContentStreamWriter(out);
writer.writeTokens(newTokens);
}
- page.setContents(newContents);
- processResources(page.getResources());
}
- private static void removeAllTextTokens(PDFormXObject xobject) throws IOException
+ private static List<Object> createTokensWithoutText(PDContentStream contentStream) throws IOException
{
- PDStream stream = xobject.getContentStream();
- PDFStreamParser parser = new PDFStreamParser(xobject);
- parser.parse();
- List<Object> tokens = parser.getTokens();
+ PDFStreamParser parser = new PDFStreamParser(contentStream);
+ Object token = parser.parseNextToken();
List<Object> newTokens = new ArrayList<>();
- for (Object token : tokens)
+ while (token != null)
{
if (token instanceof Operator)
{
Operator op = (Operator) token;
if ("TJ".equals(op.getName()) || "Tj".equals(op.getName()) ||
- "'".equals(op.getName()) || "\"".equals(op.getName()))
+ "'".equals(op.getName()) || "\"".equals(op.getName()))
{
// remove the one argument to this operator
newTokens.remove(newTokens.size() - 1);
+
+ token = parser.parseNextToken();
continue;
}
}
newTokens.add(token);
+ token = parser.parseNextToken();
}
- try (OutputStream out = stream.createOutputStream(COSName.FLATE_DECODE))
- {
- ContentStreamWriter writer = new ContentStreamWriter(out);
- writer.writeTokens(newTokens);
- }
- processResources(xobject.getResources());
+ return newTokens;
}
/**