You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@pdfbox.apache.org by Ruhong Cai <Ru...@smartbear.com> on 2013/06/10 20:30:21 UTC
RE: pdfbox throw the exception when read the attched pdf file.
When we run pdfBox for the attached pdf file, got the following error, thanks for the help!
java.io.IOException
at org.apache.pdfbox.filter.FlateFilter.decode(FlateFilter.java:138)
at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:301)
at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:221)
at org.apache.pdfbox.cos.COSStream.getUnfilteredStream(COSStream.java:156)
at org.apache.pdfbox.pdmodel.common.COSStreamArray.getUnfilteredStream(COSStreamArray.java:196)
at org.apache.pdfbox.pdfparser.PDFStreamParser.<init>(PDFStreamParser.java:108)
at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:253)
at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:237)
at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:217)
at PDFTest.main(PDFTest.java:57)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:120)
Caused by: java.util.zip.DataFormatException: unknown compression method
at java.util.zip.Inflater.inflateBytes(Native Method)
at java.util.zip.Inflater.inflate(Inflater.java:238)
at java.util.zip.Inflater.inflate(Inflater.java:256)
at org.apache.pdfbox.filter.FlateFilter.decompress(FlateFilter.java:169)
at org.apache.pdfbox.filter.FlateFilter.decode(FlateFilter.java:98)
... 14 more
Here are our Code
==========================================================================
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFStreamEngine;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.TextPosition;
import java.io.IOException;
import java.util.Arrays;
import java.io.File;
public class PDFTest {
private static PDDocument getPDF() throws Exception {
PDDocument pdf;
try {
pdf = PDDocument.load(new File("C:\\tmp\\YPG-235.pdf"));
} catch(Exception e) {
throw new Exception(e.getMessage());
}
return pdf;
}
public static void main(String args[]) throws Exception
{
try {
PDDocument doc = getPDF();
PDPage page = (PDPage) doc.getDocumentCatalog().getAllPages().get(1 - 1);
final TextRectArray trects = new TextRectArray();
PDFStreamEngine grabber = new PDFTextStripper() {
{
setSortByPosition(true);
}
@Override
protected void processTextPosition( TextPosition text ) {
float x = 1.5f * text.getXDirAdj();
float h = 1.5f * Math.abs(text.getHeightDir());
float y = 1.5f * text.getYDirAdj(); // their y is bottom left of the character
float w = 1.5f * text.getWidthDirAdj();
TextRect rect = new TextRect(text.getCharacter(), x, y-h, w, h);
trects.add(rect);
}
};
PDStream content = page.getContents();
if(content != null)
grabber.processStream(page, page.findResources(), page.getContents().getStream());
} catch (Exception e) {
System.out.println("Error occur: " + e.getCause().getMessage() );
e.printStackTrace();
}
}
static class TextRectArray {
StringBuilder text;
int size;
int capacity;
int[] starts;
float[] xs;
float[] ys;
float[] ws;
float[] hs;
int initialCapacity = 1024;
public TextRectArray() {
size = 0;
text = new StringBuilder(initialCapacity); // usually average 1 char per
capacity = initialCapacity;
starts = new int[initialCapacity];
xs = new float[initialCapacity];
ys = new float[initialCapacity];
ws = new float[initialCapacity];
hs = new float[initialCapacity];
}
public void add(TextRect rect) {
if(size == capacity) {
grow();
}
starts[size] = text.length();
xs[size] = rect.getX();
ys[size] = rect.getY();
ws[size] = rect.getW();
hs[size] = rect.getH();
size++;
text.append(rect.getText());
}
private void grow() {
int newCap = Math.max(capacity, 16) * 2;
if(newCap <= 0) // wrapping??!?!?
throw new IllegalStateException("this isn't meant to handle so many, currently " + size );
// text takes care of itself
starts = Arrays.copyOf(starts, newCap);
xs = Arrays.copyOf(xs, newCap);
ys = Arrays.copyOf(ys, newCap);
ws = Arrays.copyOf(ws, newCap);
hs = Arrays.copyOf(hs, newCap);
capacity = newCap;
}
}
static class TextRect {
public String text;
public float x, y, w, h;
public TextRect(String text, float x, float y, float w, float h) {
setText(text);
setX(x);
setY(y);
setW(w);
setH(h);
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public float getX() {
return x;
}
public void setX(float x) {
this.x = x;
}
public float getY() {
return y;
}
public void setY(float y) {
this.y = y;
}
public float getW() {
return w;
}
public void setW(float w) {
this.w = w;
}
public float getH() {
return h;
}
public void setH(float h) {
this.h = h;
}
}
}