You are viewing a plain text version of this content. The canonical link for it is here.
Posted to users@pdfbox.apache.org by Jimran <ji...@gmail.com> on 2010/01/07 10:31:49 UTC
java.io.IOException: expected='/' actual='%'-37
Hi Developer,
* I need to search some words like Thumb, FitH, Linearized from the given in
put PDF file by extending BaseParser class
* I have extended some methods in BaseParser,
* When i call my PdfBasparser class parscosName(), it following exception,*
java.io.IOException*: expected='/' actual='%'-37
org.pdfbox.io.PushBackInputStream@bcb23e
at com.elsevier.vtool.PdfBaseParser.parseCOSName(*PdfBaseParser.java:28*)
at com.elsevier.vtool.PdfBaseParser.<init>(*PdfBaseParser.java:19*)
* That is my exception clearly says PDF character content should starts with
'/'.........
* Please give me idea for to call parscosName(); method without exception...
That is wether i have to override or call some other method to avoid this
exception....? please confirm..........
* Thanks in advance
* The following is my code,
*
package* com.elsevier.vtool;
*import* java.io.IOException;
*
import* java.io.InputStream;
*
import* *org.pdfbox.cos.COSDocument*;
*
import* org.pdfbox.cos.COSName;
*
import* org.pdfbox.pdfparser.BaseParser;
*
public* *class* PdfBaseParser *extends* BaseParser {
//private COSDocument document = null;
*private* *boolean* linearized;
*private* *boolean* thumbnail;
*private* *boolean* fitwidth;
*public* PdfBaseParser(InputStream input) *throws* IOException {
*super*(input);
parseCOSName();
}
*protected* COSName parseCOSName() {
COSName retval = *null*;
*try* {
*int* c = pdfSource.read();
*
if*( (*char*)c != '/')
{
*throw* *new* IOException("expected='/' actual='" + (*char*)c + "'-" + c + "
" + pdfSource );
}
// *costruisce* *il* *nome
*
StringBuffer buffer = *new* StringBuffer();
c = pdfSource.read();
*while*( c != -1 )
{
*char* ch = (*char*)c;
*if*(ch == '#')
{
*char* ch1 = (*char*)pdfSource.read();
*char* ch2 = (*char*)pdfSource.read();
// Prior to PDF v1.2, the # was not a special character. Also,
// it has been observed that various PDF tools do not follow the
// *spec* with respect to the # escape, even though they report
// PDF versions of 1.2 or later. The solution here is that we
// interpret the # as an escape only when it is followed by two
// valid hex digits.
//
*if* (*isHexDigit*(ch1) && *isHexDigit*(ch2))
{
String hex = "" + ch1 + ch2;
*try
*
{
buffer.append( (*char*) Integer.*parseInt*(hex, 16));
}
*catch* (NumberFormatException e)
{
*throw* *new* IOException("Error: expected hex number, actual='" + hex + "'"
);
}
c = pdfSource.read();
}
*else
*
{
pdfSource.unread(ch2);
c = ch1;
buffer.append( ch );
}
}
*else* *if* (isEndOfName(ch))
{
*break*;
}
*else
*
{
buffer.append( ch );
c = pdfSource.read();
}
}
*if* (c != -1)
{
pdfSource.unread(c);
}
//Start of additional code
*if* (buffer.toString().equals("Linearized"))
{
setLinearized();
}
*if* (buffer.toString().equals("Thumb"))
{
setThumbnail();
}
*if* (buffer.toString().equals("FitH"))
{
setFitwidth();
}
//End of additional code
retval = COSName.*getPDFName*( buffer.toString() );
} *catch* (IOException e) {
e.printStackTrace();
}
*return* retval;
}
*public* *void* setLinearized()
{
linearized = *true*;
}
*public* *boolean* getLinearized()
{
*return* linearized;
}
*public* *void* setThumbnail()
{
thumbnail = *true*;
}
*public* *boolean* getThumbnail()
{
*return* thumbnail;
}
*public* *void* setFitwidth()
{
fitwidth = *true*;
}
*public* *boolean* getFitwidth()
{
*return* fitwidth;
}
*private* *static* *boolean* isHexDigit(*char* ch)
{
*return* (ch >= '0' && ch <= '9') ||
(ch >= 'a' && ch <= 'f') ||
(ch >= 'A' && ch <= 'F');
// the line below can lead to problems with certain versions of the IBM JIT
compiler
// (and is slower anyway)
//return (HEXDIGITS.indexOf(*ch*) != -1);
}
}
--
With Regards,
JavaImran