You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@pdfbox.apache.org by Ruhong Cai <Ru...@smartbear.com> on 2013/06/10 20:30:21 UTC

RE: pdfbox throw the exception when read the attched pdf file.

 

 When we run pdfBox for the attached pdf file,  got the following error, thanks for the help!

	java.io.IOException
	at org.apache.pdfbox.filter.FlateFilter.decode(FlateFilter.java:138)
	at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:301)
	at org.apache.pdfbox.cos.COSStream.doDecode(COSStream.java:221)
	at org.apache.pdfbox.cos.COSStream.getUnfilteredStream(COSStream.java:156)
	at org.apache.pdfbox.pdmodel.common.COSStreamArray.getUnfilteredStream(COSStreamArray.java:196)
	at org.apache.pdfbox.pdfparser.PDFStreamParser.<init>(PDFStreamParser.java:108)
	at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:253)
	at org.apache.pdfbox.util.PDFStreamEngine.processSubStream(PDFStreamEngine.java:237)
	at org.apache.pdfbox.util.PDFStreamEngine.processStream(PDFStreamEngine.java:217)
	at PDFTest.main(PDFTest.java:57)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597)
	at com.intellij.rt.execution.application.AppMain.main(AppMain.java:120)
Caused by: java.util.zip.DataFormatException: unknown compression method
	at java.util.zip.Inflater.inflateBytes(Native Method)
	at java.util.zip.Inflater.inflate(Inflater.java:238)
	at java.util.zip.Inflater.inflate(Inflater.java:256)
	at org.apache.pdfbox.filter.FlateFilter.decompress(FlateFilter.java:169)
	at org.apache.pdfbox.filter.FlateFilter.decode(FlateFilter.java:98)
	... 14 more

Here are our Code
==========================================================================
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.PDFStreamEngine;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.pdfbox.util.TextPosition;

import java.io.IOException;
import java.util.Arrays;
import java.io.File;

public class PDFTest {
    private static PDDocument getPDF() throws Exception    {
        PDDocument pdf;
        try {
            pdf = PDDocument.load(new File("C:\\tmp\\YPG-235.pdf"));
        } catch(Exception e) {
            throw new Exception(e.getMessage());
        }
        return pdf;
    }
    public static void  main(String args[])    throws Exception
    {
        try {
            PDDocument doc = getPDF();
            PDPage page = (PDPage) doc.getDocumentCatalog().getAllPages().get(1 - 1);

            final TextRectArray trects = new TextRectArray();
            PDFStreamEngine grabber = new PDFTextStripper() {
                {
                    setSortByPosition(true);
                }
                @Override
                protected void processTextPosition( TextPosition text ) {
                    float x = 1.5f * text.getXDirAdj();
                    float h = 1.5f * Math.abs(text.getHeightDir());
                    float y = 1.5f * text.getYDirAdj(); // their y is bottom left of the character
                    float w = 1.5f * text.getWidthDirAdj();
                    TextRect rect = new TextRect(text.getCharacter(), x, y-h, w, h);
                    trects.add(rect);
                }
            };
            PDStream content = page.getContents();
            if(content != null)
                grabber.processStream(page, page.findResources(), page.getContents().getStream());

        } catch (Exception e) {
         System.out.println("Error occur: " + e.getCause().getMessage() );
            e.printStackTrace();
        }
    }
    static class TextRectArray {
        StringBuilder text;
        int size;
        int capacity;
        int[] starts;
        float[] xs;
        float[] ys;
        float[] ws;
        float[] hs;
        int initialCapacity = 1024;
        public TextRectArray() {
            size = 0;
            text = new StringBuilder(initialCapacity); // usually average 1 char per
            capacity = initialCapacity;
            starts = new int[initialCapacity];
            xs = new float[initialCapacity];
            ys = new float[initialCapacity];
            ws = new float[initialCapacity];
            hs = new float[initialCapacity];
        }

        public void add(TextRect rect) {
            if(size == capacity) {
                grow();
            }
            starts[size] = text.length();
            xs[size] = rect.getX();
            ys[size] = rect.getY();
            ws[size] = rect.getW();
            hs[size] = rect.getH();
            size++;
            text.append(rect.getText());
        }

        private void grow() {
            int newCap = Math.max(capacity, 16) * 2;
            if(newCap <= 0) // wrapping??!?!?
                throw new IllegalStateException("this isn't meant to handle so many, currently " + size );

            // text takes care of itself
            starts = Arrays.copyOf(starts, newCap);
            xs = Arrays.copyOf(xs, newCap);
            ys = Arrays.copyOf(ys, newCap);
            ws = Arrays.copyOf(ws, newCap);
            hs = Arrays.copyOf(hs, newCap);
            capacity = newCap;
        }
    }
    static class TextRect {
        public String text;
        public float x, y, w, h;

        public TextRect(String text, float x, float y, float w, float h) {
            setText(text);
            setX(x);
            setY(y);
            setW(w);
            setH(h);
        }
        public String getText() {
            return text;
        }
        public void setText(String text) {
            this.text = text;
        }
        public float getX() {
            return x;
        }
        public void setX(float x) {
            this.x = x;
        }
        public float getY() {
            return y;
        }
        public void setY(float y) {
            this.y = y;
        }
        public float getW() {
            return w;
        }
        public void setW(float w) {
            this.w = w;
        }
        public float getH() {
            return h;
        }
        public void setH(float h) {
            this.h = h;
        }
    }
}