You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@pdfbox.apache.org by Jimran <ji...@gmail.com> on 2010/01/07 10:31:49 UTC
java.io.IOException: expected='/' actual='%'-37

Hi Developer,

* I need to search some words like Thumb, FitH, Linearized from the given in
put PDF file by extending BaseParser class

* I have extended some methods in BaseParser,

* When i call my PdfBasparser class parscosName(), it following exception,*

java.io.IOException*: expected='/' actual='%'-37
org.pdfbox.io.PushBackInputStream@bcb23e

at com.elsevier.vtool.PdfBaseParser.parseCOSName(*PdfBaseParser.java:28*)

at com.elsevier.vtool.PdfBaseParser.<init>(*PdfBaseParser.java:19*)

* That is my exception clearly says PDF character content should starts with
'/'.........

* Please give me idea for to call parscosName(); method without exception...
That is wether i have to override or call some other method to avoid this
exception....? please confirm..........

* Thanks in advance

* The following is my code,
*

package* com.elsevier.vtool;

*import* java.io.IOException;
*

import* java.io.InputStream;

*

import* *org.pdfbox.cos.COSDocument*;
*

import* org.pdfbox.cos.COSName;
*

import* org.pdfbox.pdfparser.BaseParser;

*

public* *class* PdfBaseParser *extends* BaseParser {

//private COSDocument document = null;

*private* *boolean* linearized;

*private* *boolean* thumbnail;

*private* *boolean* fitwidth;

*public* PdfBaseParser(InputStream input) *throws* IOException {

*super*(input);

parseCOSName();

}

*protected* COSName parseCOSName() {

COSName retval = *null*;

*try* {

*int* c = pdfSource.read();
*

if*( (*char*)c != '/')

{

*throw* *new* IOException("expected='/' actual='" + (*char*)c + "'-" + c + "
" + pdfSource );

}

// *costruisce* *il* *nome
*

StringBuffer buffer = *new* StringBuffer();

c = pdfSource.read();

*while*( c != -1 )

{

*char* ch = (*char*)c;

*if*(ch == '#')

{

*char* ch1 = (*char*)pdfSource.read();

*char* ch2 = (*char*)pdfSource.read();

// Prior to PDF v1.2, the # was not a special character. Also,

// it has been observed that various PDF tools do not follow the

// *spec* with respect to the # escape, even though they report

// PDF versions of 1.2 or later. The solution here is that we

// interpret the # as an escape only when it is followed by two

// valid hex digits.

//

*if* (*isHexDigit*(ch1) && *isHexDigit*(ch2))

{

String hex = "" + ch1 + ch2;

*try
*

{

buffer.append( (*char*) Integer.*parseInt*(hex, 16));

}

*catch* (NumberFormatException e)

{

*throw* *new* IOException("Error: expected hex number, actual='" + hex + "'"
);

}

c = pdfSource.read();

}

*else
*

{

pdfSource.unread(ch2);

c = ch1;

buffer.append( ch );

}

}

*else* *if* (isEndOfName(ch))

{

*break*;

}

*else
*

{

buffer.append( ch );

c = pdfSource.read();

}

}

*if* (c != -1)

{

pdfSource.unread(c);

}

//Start of additional code

*if* (buffer.toString().equals("Linearized"))

{

setLinearized();

}

*if* (buffer.toString().equals("Thumb"))

{

setThumbnail();

}

*if* (buffer.toString().equals("FitH"))

{

setFitwidth();

}

//End of additional code

retval = COSName.*getPDFName*( buffer.toString() );

} *catch* (IOException e) {

e.printStackTrace();

}

*return* retval;

}

*public* *void* setLinearized()

{

linearized = *true*;

}

*public* *boolean* getLinearized()

{

*return* linearized;

}

*public* *void* setThumbnail()

{

thumbnail = *true*;

}

*public* *boolean* getThumbnail()

{

*return* thumbnail;

}

*public* *void* setFitwidth()

{

fitwidth = *true*;

}

*public* *boolean* getFitwidth()

{

*return* fitwidth;

}

*private* *static* *boolean* isHexDigit(*char* ch)

{

*return* (ch >= '0' && ch <= '9') ||

(ch >= 'a' && ch <= 'f') ||

(ch >= 'A' && ch <= 'F');

// the line below can lead to problems with certain versions of the IBM JIT
compiler

// (and is slower anyway)

//return (HEXDIGITS.indexOf(*ch*) != -1);

}

}


-- 
With Regards,
JavaImran