You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/02/18 19:04:25 UTC
svn commit: r1290834 -
/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
Author: leleueri
Date: Sat Feb 18 18:04:25 2012
New Revision: 1290834
URL: http://svn.apache.org/viewvc?rev=1290834&view=rev
Log:
[PDFBOX-1161] Addition of a control on the end of xxxrange & xxxchar operators
Modified:
pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1290834&r1=1290833&r2=1290834&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sat Feb 18 18:04:25 2012
@@ -37,613 +37,654 @@ import org.apache.fontbox.util.ResourceL
*/
public class CMapParser
{
- private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange";
- private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar";
- private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange";
- private static final String BEGIN_CID_CHAR = "begincidchar";
- private static final String BEGIN_CID_RANGE = "begincidrange";
- private static final String USECMAP = "usecmap";
-
- private static final String WMODE = "WMode";
- private static final String CMAP_NAME = "CMapName";
- private static final String CMAP_VERSION = "CMapVersion";
- private static final String CMAP_TYPE = "CMapType";
- private static final String REGISTRY = "Registry";
- private static final String ORDERING = "Ordering";
- private static final String SUPPLEMENT = "Supplement";
-
- private static final String MARK_END_OF_DICTIONARY = ">>";
- private static final String MARK_END_OF_ARRAY = "]";
-
-
- private byte[] tokenParserByteBuffer = new byte[512];
-
- /**
- * Creates a new instance of CMapParser.
- */
- public CMapParser()
- {
- }
-
- /**
- * Parse a CMAP file on the file system.
- *
- * @param file The file to parse.
- *
- * @return A parsed CMAP file.
- *
- * @throws IOException If there is an issue while parsing the CMAP.
- */
- public CMap parse( File file ) throws IOException
- {
- String rootDir = file.getParent() + File.separator;
- FileInputStream input = null;
- try
- {
- input = new FileInputStream( file );
- return parse( rootDir, input );
- }
- finally
- {
- if( input != null )
- {
- input.close();
- }
- }
-
- }
-
- /**
- * This will parse the stream and create a cmap object.
- *
- * @param resourceRoot The root path to the cmap file. This will be used
- * to find referenced cmap files. It can be null.
- * @param input The CMAP stream to parse.
- *
- * @return The parsed stream as a java object.
- *
- * @throws IOException If there is an error parsing the stream.
- */
- public CMap parse( String resourceRoot, InputStream input ) throws IOException
- {
- PushbackInputStream cmapStream = new PushbackInputStream( input );
- CMap result = new CMap();
- Object previousToken = null;
- Object token = null;
- while( (token = parseNextToken( cmapStream )) != null )
- {
- if( token instanceof Operator )
- {
- Operator op = (Operator)token;
- if( op.op.equals( USECMAP ) )
- {
- LiteralName useCmapName = (LiteralName)previousToken;
- InputStream useStream = ResourceLoader.loadResource( resourceRoot + useCmapName.name );
- if( useStream == null )
- {
- throw new IOException( "Error: Could not find referenced cmap stream " + useCmapName.name );
- }
- CMap useCMap = parse( resourceRoot, useStream );
- result.useCmap( useCMap );
- }
- else if( op.op.equals( BEGIN_CODESPACE_RANGE ) )
- {
- Number cosCount = (Number)previousToken;
- for( int j=0; j<cosCount.intValue(); j++ )
- {
- byte[] startRange = (byte[])parseNextToken( cmapStream );
- byte[] endRange = (byte[])parseNextToken( cmapStream );
- CodespaceRange range = new CodespaceRange();
- range.setStart( startRange );
- range.setEnd( endRange );
- result.addCodespaceRange( range );
- }
- }
- else if( op.op.equals( BEGIN_BASE_FONT_CHAR ) )
- {
- Number cosCount = (Number)previousToken;
- for( int j=0; j<cosCount.intValue(); j++ )
- {
- byte[] inputCode = (byte[])parseNextToken( cmapStream );
- Object nextToken = parseNextToken( cmapStream );
- if( nextToken instanceof byte[] )
- {
- byte[] bytes = (byte[])nextToken;
- String value = createStringFromBytes( bytes );
- result.addMapping( inputCode, value );
- }
- else if( nextToken instanceof LiteralName )
- {
- result.addMapping( inputCode, ((LiteralName)nextToken).name );
- }
- else
- {
- throw new IOException( "Error parsing CMap beginbfchar, expected{COSString " +
- "or COSName} and not " + nextToken );
- }
- }
- }
- else if( op.op.equals( BEGIN_BASE_FONT_RANGE ) )
- {
- Number cosCount = (Number)previousToken;
-
- for( int j=0; j<cosCount.intValue(); j++ )
- {
- byte[] startCode = (byte[])parseNextToken( cmapStream );
- byte[] endCode = (byte[])parseNextToken( cmapStream );
- Object nextToken = parseNextToken( cmapStream );
- List<byte[]> array = null;
- byte[] tokenBytes = null;
- if( nextToken instanceof List<?> )
- {
- array = (List<byte[]>)nextToken;
- tokenBytes = array.get( 0 );
- }
- else
- {
- tokenBytes = (byte[])nextToken;
- }
-
- String value = null;
-
- int arrayIndex = 0;
- boolean done = false;
- while( !done )
- {
- if( compare( startCode, endCode ) >= 0 )
- {
- done = true;
- }
- value = createStringFromBytes( tokenBytes );
- result.addMapping( startCode, value );
- increment( startCode );
-
- if( array == null )
- {
- increment( tokenBytes );
- }
- else
- {
- arrayIndex++;
- if( arrayIndex < array.size() )
- {
- tokenBytes = (byte[])array.get( arrayIndex );
- }
- }
- }
- }
- }
- else if( op.op.equals( BEGIN_CID_CHAR ) )
- {
- Number cosCount = (Number)previousToken;
- for( int j=0; j<cosCount.intValue(); j++ )
- {
- byte[] inputCode = (byte[])parseNextToken( cmapStream );
- int mappedCode = (Integer)parseNextToken( cmapStream );
- String mappedStr = createStringFromBytes(inputCode);
- result.addCIDMapping(mappedCode, mappedStr);
- }
- }
- else if( op.op.equals( BEGIN_CID_RANGE ) )
- {
- int numberOfLines = (Integer)previousToken;
- for (int n=0; n < numberOfLines;n++) {
- byte[] startCode = (byte[])parseNextToken( cmapStream );
- int start = createIntFromBytes(startCode);
- byte[] endCode = (byte[])parseNextToken( cmapStream );
- int end = createIntFromBytes(endCode);
- int mappedCode = (Integer)parseNextToken( cmapStream );
- if (startCode.length <= 2 && endCode.length <= 2) {
- result.addCIDRange(
- (char) start, (char) end, mappedCode);
- } else {
- // TODO: Is this even possible?
- int endOfMappings = mappedCode + end-start;
- while (mappedCode<=endOfMappings) {
- String mappedStr = createStringFromBytes(startCode);
- result.addCIDMapping(mappedCode++, mappedStr);
- increment(startCode);
- }
- }
- }
- }
- }
- else if (token instanceof LiteralName){
- LiteralName literal = (LiteralName)token;
- if (WMODE.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof Integer)
- {
- result.setWMode((Integer)next);
- }
- }
- else if (CMAP_NAME.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof LiteralName)
- {
- result.setName(((LiteralName)next).name);
- }
- }
- else if (CMAP_VERSION.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof Number)
- {
- result.setVersion(((Number)next).toString());
- }
- else if (next instanceof String)
- {
- result.setVersion((String)next);
- }
- }
- else if (CMAP_TYPE.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof Integer)
- {
- result.setType((Integer)next);
- }
- }
- else if (REGISTRY.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof String)
- {
- result.setRegistry((String)next);
- }
- }
- else if (ORDERING.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof String)
- {
- result.setOrdering((String)next);
- }
- }
- else if (SUPPLEMENT.equals(literal.name))
- {
- Object next = parseNextToken(cmapStream);
- if (next instanceof Integer)
- {
- result.setSupplement((Integer)next);
- }
- }
- }
- previousToken = token;
- }
- return result;
- }
-
- private Object parseNextToken( PushbackInputStream is ) throws IOException
- {
- Object retval = null;
- int nextByte = is.read();
- //skip whitespace
- while( nextByte == 0x09 || nextByte == 0x20 || nextByte == 0x0D || nextByte == 0x0A )
- {
- nextByte = is.read();
- }
- switch( nextByte )
- {
- case '%':
- {
- //header operations, for now return the entire line
- //may need to smarter in the future
- StringBuffer buffer = new StringBuffer();
- buffer.append( (char)nextByte );
- readUntilEndOfLine( is, buffer );
- retval = buffer.toString();
- break;
- }
- case '(':
- {
- StringBuffer buffer = new StringBuffer();
- int stringByte = is.read();
-
- while( stringByte != -1 && stringByte != ')' )
- {
- buffer.append( (char)stringByte );
- stringByte = is.read();
- }
- retval = buffer.toString();
- break;
- }
- case '>':
- {
- int secondCloseBrace = is.read();
- if( secondCloseBrace == '>' )
- {
- retval = MARK_END_OF_DICTIONARY;
- }
- else
- {
- throw new IOException( "Error: expected the end of a dictionary.");
- }
- break;
- }
- case ']':
- {
- retval = MARK_END_OF_ARRAY;
- break;
- }
- case '[':
- {
- List<Object> list = new ArrayList<Object>();
-
- Object nextToken = parseNextToken( is );
- while( nextToken != null && nextToken != MARK_END_OF_ARRAY )
- {
- list.add( nextToken );
- nextToken = parseNextToken( is );
- }
- retval = list;
- break;
- }
- case '<':
- {
- int theNextByte = is.read();
- if( theNextByte == '<' )
- {
- Map<String,Object> result = new HashMap<String,Object>();
- //we are reading a dictionary
- Object key = parseNextToken( is );
- while( key instanceof LiteralName && key != MARK_END_OF_DICTIONARY )
- {
- Object value = parseNextToken( is );
- result.put( ((LiteralName)key).name, value );
- key = parseNextToken( is );
- }
- retval = result;
- }
- else
- {
- //won't read more than 512 bytes
-
- int multiplyer = 16;
- int bufferIndex = -1;
- while( theNextByte != -1 && theNextByte != '>' )
- {
- int intValue = 0;
- if( theNextByte >= '0' && theNextByte <= '9' )
- {
- intValue = theNextByte - '0';
- }
- else if( theNextByte >= 'A' && theNextByte <= 'F' )
- {
- intValue = 10 + theNextByte - 'A';
- }
- else if( theNextByte >= 'a' && theNextByte <= 'f' )
- {
- intValue = 10 + theNextByte - 'a';
- }
- else if( theNextByte == 0x20 )
- {
- // skipping whitespaces
- theNextByte = is.read();
- continue;
- }
- else
- {
- throw new IOException( "Error: expected hex character and not " +
- (char)theNextByte + ":" + theNextByte );
- }
- intValue *= multiplyer;
- if( multiplyer == 16 )
- {
- bufferIndex++;
- tokenParserByteBuffer[bufferIndex] = 0;
- multiplyer = 1;
- }
- else
- {
- multiplyer = 16;
- }
- tokenParserByteBuffer[bufferIndex]+= intValue;
- theNextByte = is.read();
- }
- byte[] finalResult = new byte[bufferIndex+1];
- System.arraycopy(tokenParserByteBuffer,0,finalResult, 0, bufferIndex+1);
- retval = finalResult;
- }
- break;
- }
- case '/':
- {
- StringBuffer buffer = new StringBuffer();
- int stringByte = is.read();
-
- while( !isWhitespaceOrEOF( stringByte ) )
- {
- buffer.append( (char)stringByte );
- stringByte = is.read();
- }
- retval = new LiteralName( buffer.toString() );
- break;
- }
- case -1:
- {
- //EOF return null;
- break;
- }
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- {
- StringBuffer buffer = new StringBuffer();
- buffer.append( (char)nextByte );
- nextByte = is.read();
-
- while( !isWhitespaceOrEOF( nextByte ) &&
- (Character.isDigit( (char)nextByte )||
- nextByte == '.' ) )
- {
- buffer.append( (char)nextByte );
- nextByte = is.read();
- }
- is.unread( nextByte );
- String value = buffer.toString();
- if( value.indexOf( '.' ) >=0 )
- {
- retval = new Double( value );
- }
- else
- {
- retval = new Integer( value );
- }
- break;
- }
- default:
- {
- StringBuffer buffer = new StringBuffer();
- buffer.append( (char)nextByte );
- nextByte = is.read();
-
- while( !isWhitespaceOrEOF( nextByte ) )
- {
- buffer.append( (char)nextByte );
- nextByte = is.read();
- }
- retval = new Operator( buffer.toString() );
-
- break;
- }
- }
- return retval;
- }
-
- private void readUntilEndOfLine( InputStream is, StringBuffer buf ) throws IOException
- {
- int nextByte = is.read();
- while( nextByte != -1 && nextByte != 0x0D && nextByte != 0x0A )
- {
- buf.append( (char)nextByte );
- nextByte = is.read();
- }
- }
-
- private boolean isWhitespaceOrEOF( int aByte )
- {
- return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A;
- }
-
-
- private void increment( byte[] data )
- {
- increment( data, data.length-1 );
- }
-
- private void increment( byte[] data, int position )
- {
- if( position > 0 && (data[position]+256)%256 == 255 )
- {
- data[position]=0;
- increment( data, position-1);
- }
- else
- {
- data[position] = (byte)(data[position]+1);
- }
- }
-
- private int createIntFromBytes(byte[] bytes)
- {
- int intValue = (bytes[0]+256)%256;
- if (bytes.length == 2)
- {
- intValue <<= 8;
- intValue += (bytes[1]+256)%256;
- }
- return intValue;
- }
-
- private String createStringFromBytes( byte[] bytes ) throws IOException
- {
- String retval = null;
- if( bytes.length == 1 )
- {
- retval = new String( bytes, "ISO-8859-1" );
- }
- else
- {
- retval = new String( bytes, "UTF-16BE" );
- }
- return retval;
- }
-
- private int compare( byte[] first, byte[] second )
- {
- int retval = 1;
- int firstLength = first.length;
- for( int i=0; i<firstLength; i++ )
- {
- if( first[i] == second[i] )
- {
- continue;
- }
- else if( ((first[i]+256)%256) < ((second[i]+256)%256) )
- {
- retval = -1;
- break;
- }
- else
- {
- retval = 1;
- break;
- }
- }
- return retval;
- }
-
- /**
- * Internal class.
- */
- private class LiteralName
- {
- private String name;
- private LiteralName( String theName )
- {
- name = theName;
- }
- }
-
- /**
- * Internal class.
- */
- private class Operator
- {
- private String op;
- private Operator( String theOp )
- {
- op = theOp;
- }
- }
-
- /**
- * A simple class to test parsing of cmap files.
- *
- * @param args Some command line arguments.
- *
- * @throws Exception If there is an error parsing the file.
- */
- public static void main( String[] args ) throws Exception
- {
- if( args.length != 1 )
- {
- System.err.println( "usage: java org.pdfbox.cmapparser.CMapParser <CMAP File>" );
- System.exit( -1 );
- }
- CMapParser parser = new CMapParser( );
- File cmapFile = new File( args[0] );
- CMap result = parser.parse( cmapFile );
- System.out.println( "Result:" + result );
- }
+ private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange";
+ private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar";
+ private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange";
+ private static final String BEGIN_CID_CHAR = "begincidchar";
+ private static final String BEGIN_CID_RANGE = "begincidrange";
+ private static final String USECMAP = "usecmap";
+
+ private static final String END_CODESPACE_RANGE = "endcodespacerange";
+ private static final String END_BASE_FONT_CHAR = "endbfchar";
+ private static final String END_BASE_FONT_RANGE = "endbfrange";
+ private static final String END_CID_CHAR = "endcidchar";
+ private static final String END_CID_RANGE = "endcidrange";
+
+ private static final String WMODE = "WMode";
+ private static final String CMAP_NAME = "CMapName";
+ private static final String CMAP_VERSION = "CMapVersion";
+ private static final String CMAP_TYPE = "CMapType";
+ private static final String REGISTRY = "Registry";
+ private static final String ORDERING = "Ordering";
+ private static final String SUPPLEMENT = "Supplement";
+
+ private static final String MARK_END_OF_DICTIONARY = ">>";
+ private static final String MARK_END_OF_ARRAY = "]";
+
+
+ private byte[] tokenParserByteBuffer = new byte[512];
+
+ /**
+ * Creates a new instance of CMapParser.
+ */
+ public CMapParser()
+ {
+ }
+
+ /**
+ * Parse a CMAP file on the file system.
+ *
+ * @param file The file to parse.
+ *
+ * @return A parsed CMAP file.
+ *
+ * @throws IOException If there is an issue while parsing the CMAP.
+ */
+ public CMap parse( File file ) throws IOException
+ {
+ String rootDir = file.getParent() + File.separator;
+ FileInputStream input = null;
+ try
+ {
+ input = new FileInputStream( file );
+ return parse( rootDir, input );
+ }
+ finally
+ {
+ if( input != null )
+ {
+ input.close();
+ }
+ }
+
+ }
+
+ /**
+ * This will parse the stream and create a cmap object.
+ *
+ * @param resourceRoot The root path to the cmap file. This will be used
+ * to find referenced cmap files. It can be null.
+ * @param input The CMAP stream to parse.
+ *
+ * @return The parsed stream as a java object.
+ *
+ * @throws IOException If there is an error parsing the stream.
+ */
+ public CMap parse( String resourceRoot, InputStream input ) throws IOException
+ {
+ PushbackInputStream cmapStream = new PushbackInputStream( input );
+ CMap result = new CMap();
+ Object previousToken = null;
+ Object token = null;
+ while( (token = parseNextToken( cmapStream )) != null )
+ {
+ if( token instanceof Operator )
+ {
+ Operator op = (Operator)token;
+ if( op.op.equals( USECMAP ) )
+ {
+ LiteralName useCmapName = (LiteralName)previousToken;
+ InputStream useStream = ResourceLoader.loadResource( resourceRoot + useCmapName.name );
+ if( useStream == null )
+ {
+ throw new IOException( "Error: Could not find referenced cmap stream " + useCmapName.name );
+ }
+ CMap useCMap = parse( resourceRoot, useStream );
+ result.useCmap( useCMap );
+ }
+ else if( op.op.equals( BEGIN_CODESPACE_RANGE ) )
+ {
+ Number cosCount = (Number)previousToken;
+ for( int j=0; j<cosCount.intValue(); j++ )
+ {
+ Object nextToken = parseNextToken( cmapStream );
+ if (nextToken instanceof Operator) {
+ if (!((Operator)nextToken).op.equals( END_CODESPACE_RANGE )) {
+ throw new IOException("Error : ~codespacerange contains an unexpected operator : " + ((Operator)nextToken).op);
+ }
+ break;
+ }
+ byte[] startRange = (byte[])nextToken;
+ byte[] endRange = (byte[])parseNextToken( cmapStream );
+ CodespaceRange range = new CodespaceRange();
+ range.setStart( startRange );
+ range.setEnd( endRange );
+ result.addCodespaceRange( range );
+ }
+ }
+ else if( op.op.equals( BEGIN_BASE_FONT_CHAR ) )
+ {
+ Number cosCount = (Number)previousToken;
+ for( int j=0; j<cosCount.intValue(); j++ )
+ {
+ Object nextToken = parseNextToken( cmapStream );
+ if (nextToken instanceof Operator) {
+ if (!((Operator)nextToken).op.equals( END_BASE_FONT_CHAR )) {
+ throw new IOException("Error : ~bfchar contains an unexpected operator : " + ((Operator)nextToken).op);
+ }
+ break;
+ }
+ byte[] inputCode = (byte[])nextToken;
+ nextToken = parseNextToken( cmapStream );
+ if( nextToken instanceof byte[] )
+ {
+ byte[] bytes = (byte[])nextToken;
+ String value = createStringFromBytes( bytes );
+ result.addMapping( inputCode, value );
+ }
+ else if( nextToken instanceof LiteralName )
+ {
+ result.addMapping( inputCode, ((LiteralName)nextToken).name );
+ }
+ else
+ {
+ throw new IOException( "Error parsing CMap beginbfchar, expected{COSString " +
+ "or COSName} and not " + nextToken );
+ }
+ }
+ }
+ else if( op.op.equals( BEGIN_BASE_FONT_RANGE ) )
+ {
+ Number cosCount = (Number)previousToken;
+
+ for( int j=0; j<cosCount.intValue(); j++ )
+ {
+ Object nextToken = parseNextToken( cmapStream );
+ if (nextToken instanceof Operator) {
+ if (!((Operator)nextToken).op.equals( END_BASE_FONT_RANGE )) {
+ throw new IOException("Error : ~bfrange contains an unexpected operator : " + ((Operator)nextToken).op);
+ }
+ break;
+ }
+ byte[] startCode = (byte[])nextToken;
+ byte[] endCode = (byte[])parseNextToken( cmapStream );
+ nextToken = parseNextToken( cmapStream );
+ List<byte[]> array = null;
+ byte[] tokenBytes = null;
+ if( nextToken instanceof List<?> )
+ {
+ array = (List<byte[]>)nextToken;
+ tokenBytes = array.get( 0 );
+ }
+ else
+ {
+ tokenBytes = (byte[])nextToken;
+ }
+
+ String value = null;
+
+ int arrayIndex = 0;
+ boolean done = false;
+ while( !done )
+ {
+ if( compare( startCode, endCode ) >= 0 )
+ {
+ done = true;
+ }
+ value = createStringFromBytes( tokenBytes );
+ result.addMapping( startCode, value );
+ increment( startCode );
+
+ if( array == null )
+ {
+ increment( tokenBytes );
+ }
+ else
+ {
+ arrayIndex++;
+ if( arrayIndex < array.size() )
+ {
+ tokenBytes = (byte[])array.get( arrayIndex );
+ }
+ }
+ }
+ }
+ }
+ else if( op.op.equals( BEGIN_CID_CHAR ) )
+ {
+ Number cosCount = (Number)previousToken;
+ for( int j=0; j<cosCount.intValue(); j++ )
+ {
+ Object nextToken = parseNextToken( cmapStream );
+ if (nextToken instanceof Operator) {
+ if (!((Operator)nextToken).op.equals( END_CID_CHAR )) {
+ throw new IOException("Error : ~cidchar contains an unexpected operator : " + ((Operator)nextToken).op);
+ }
+ break;
+ }
+ byte[] inputCode = (byte[])nextToken;
+ int mappedCode = (Integer)parseNextToken( cmapStream );
+ String mappedStr = createStringFromBytes(inputCode);
+ result.addCIDMapping(mappedCode, mappedStr);
+ }
+ }
+ else if( op.op.equals( BEGIN_CID_RANGE ) )
+ {
+ int numberOfLines = (Integer)previousToken;
+ for (int n=0; n < numberOfLines;n++) {
+ Object nextToken = parseNextToken( cmapStream );
+ if (nextToken instanceof Operator) {
+ if (!((Operator)nextToken).op.equals( END_CID_RANGE )) {
+ throw new IOException("Error : ~cidrange contains an unexpected operator : " + ((Operator)nextToken).op);
+ }
+ break;
+ }
+ byte[] startCode = (byte[])nextToken;
+ int start = createIntFromBytes(startCode);
+ byte[] endCode = (byte[])parseNextToken( cmapStream );
+ int end = createIntFromBytes(endCode);
+ int mappedCode = (Integer)parseNextToken( cmapStream );
+ if (startCode.length <= 2 && endCode.length <= 2) {
+ result.addCIDRange(
+ (char) start, (char) end, mappedCode);
+ } else {
+ // TODO: Is this even possible?
+ int endOfMappings = mappedCode + end-start;
+ while (mappedCode<=endOfMappings) {
+ String mappedStr = createStringFromBytes(startCode);
+ result.addCIDMapping(mappedCode++, mappedStr);
+ increment(startCode);
+ }
+ }
+ }
+ }
+ }
+ else if (token instanceof LiteralName){
+ LiteralName literal = (LiteralName)token;
+ if (WMODE.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof Integer)
+ {
+ result.setWMode((Integer)next);
+ }
+ }
+ else if (CMAP_NAME.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof LiteralName)
+ {
+ result.setName(((LiteralName)next).name);
+ }
+ }
+ else if (CMAP_VERSION.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof Number)
+ {
+ result.setVersion(((Number)next).toString());
+ }
+ else if (next instanceof String)
+ {
+ result.setVersion((String)next);
+ }
+ }
+ else if (CMAP_TYPE.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof Integer)
+ {
+ result.setType((Integer)next);
+ }
+ }
+ else if (REGISTRY.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof String)
+ {
+ result.setRegistry((String)next);
+ }
+ }
+ else if (ORDERING.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof String)
+ {
+ result.setOrdering((String)next);
+ }
+ }
+ else if (SUPPLEMENT.equals(literal.name))
+ {
+ Object next = parseNextToken(cmapStream);
+ if (next instanceof Integer)
+ {
+ result.setSupplement((Integer)next);
+ }
+ }
+ }
+ previousToken = token;
+ }
+ return result;
+ }
+
+ private Object parseNextToken( PushbackInputStream is ) throws IOException
+ {
+ Object retval = null;
+ int nextByte = is.read();
+ //skip whitespace
+ while( nextByte == 0x09 || nextByte == 0x20 || nextByte == 0x0D || nextByte == 0x0A )
+ {
+ nextByte = is.read();
+ }
+ switch( nextByte )
+ {
+ case '%':
+ {
+ //header operations, for now return the entire line
+ //may need to smarter in the future
+ StringBuffer buffer = new StringBuffer();
+ buffer.append( (char)nextByte );
+ readUntilEndOfLine( is, buffer );
+ retval = buffer.toString();
+ break;
+ }
+ case '(':
+ {
+ StringBuffer buffer = new StringBuffer();
+ int stringByte = is.read();
+
+ while( stringByte != -1 && stringByte != ')' )
+ {
+ buffer.append( (char)stringByte );
+ stringByte = is.read();
+ }
+ retval = buffer.toString();
+ break;
+ }
+ case '>':
+ {
+ int secondCloseBrace = is.read();
+ if( secondCloseBrace == '>' )
+ {
+ retval = MARK_END_OF_DICTIONARY;
+ }
+ else
+ {
+ throw new IOException( "Error: expected the end of a dictionary.");
+ }
+ break;
+ }
+ case ']':
+ {
+ retval = MARK_END_OF_ARRAY;
+ break;
+ }
+ case '[':
+ {
+ List<Object> list = new ArrayList<Object>();
+
+ Object nextToken = parseNextToken( is );
+ while( nextToken != null && nextToken != MARK_END_OF_ARRAY )
+ {
+ list.add( nextToken );
+ nextToken = parseNextToken( is );
+ }
+ retval = list;
+ break;
+ }
+ case '<':
+ {
+ int theNextByte = is.read();
+ if( theNextByte == '<' )
+ {
+ Map<String,Object> result = new HashMap<String,Object>();
+ //we are reading a dictionary
+ Object key = parseNextToken( is );
+ while( key instanceof LiteralName && key != MARK_END_OF_DICTIONARY )
+ {
+ Object value = parseNextToken( is );
+ result.put( ((LiteralName)key).name, value );
+ key = parseNextToken( is );
+ }
+ retval = result;
+ }
+ else
+ {
+ //won't read more than 512 bytes
+
+ int multiplyer = 16;
+ int bufferIndex = -1;
+ while( theNextByte != -1 && theNextByte != '>' )
+ {
+ int intValue = 0;
+ if( theNextByte >= '0' && theNextByte <= '9' )
+ {
+ intValue = theNextByte - '0';
+ }
+ else if( theNextByte >= 'A' && theNextByte <= 'F' )
+ {
+ intValue = 10 + theNextByte - 'A';
+ }
+ else if( theNextByte >= 'a' && theNextByte <= 'f' )
+ {
+ intValue = 10 + theNextByte - 'a';
+ }
+ else if( theNextByte == 0x20 )
+ {
+ // skipping whitespaces
+ theNextByte = is.read();
+ continue;
+ }
+ else
+ {
+ throw new IOException( "Error: expected hex character and not " +
+ (char)theNextByte + ":" + theNextByte );
+ }
+ intValue *= multiplyer;
+ if( multiplyer == 16 )
+ {
+ bufferIndex++;
+ tokenParserByteBuffer[bufferIndex] = 0;
+ multiplyer = 1;
+ }
+ else
+ {
+ multiplyer = 16;
+ }
+ tokenParserByteBuffer[bufferIndex]+= intValue;
+ theNextByte = is.read();
+ }
+ byte[] finalResult = new byte[bufferIndex+1];
+ System.arraycopy(tokenParserByteBuffer,0,finalResult, 0, bufferIndex+1);
+ retval = finalResult;
+ }
+ break;
+ }
+ case '/':
+ {
+ StringBuffer buffer = new StringBuffer();
+ int stringByte = is.read();
+
+ while( !isWhitespaceOrEOF( stringByte ) )
+ {
+ buffer.append( (char)stringByte );
+ stringByte = is.read();
+ }
+ retval = new LiteralName( buffer.toString() );
+ break;
+ }
+ case -1:
+ {
+ //EOF return null;
+ break;
+ }
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append( (char)nextByte );
+ nextByte = is.read();
+
+ while( !isWhitespaceOrEOF( nextByte ) &&
+ (Character.isDigit( (char)nextByte )||
+ nextByte == '.' ) )
+ {
+ buffer.append( (char)nextByte );
+ nextByte = is.read();
+ }
+ is.unread( nextByte );
+ String value = buffer.toString();
+ if( value.indexOf( '.' ) >=0 )
+ {
+ retval = new Double( value );
+ }
+ else
+ {
+ retval = new Integer( value );
+ }
+ break;
+ }
+ default:
+ {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append( (char)nextByte );
+ nextByte = is.read();
+
+ while( !isWhitespaceOrEOF( nextByte ) )
+ {
+ buffer.append( (char)nextByte );
+ nextByte = is.read();
+ }
+ retval = new Operator( buffer.toString() );
+
+ break;
+ }
+ }
+ return retval;
+ }
+
+ private void readUntilEndOfLine( InputStream is, StringBuffer buf ) throws IOException
+ {
+ int nextByte = is.read();
+ while( nextByte != -1 && nextByte != 0x0D && nextByte != 0x0A )
+ {
+ buf.append( (char)nextByte );
+ nextByte = is.read();
+ }
+ }
+
+ private boolean isWhitespaceOrEOF( int aByte )
+ {
+ return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A;
+ }
+
+
+ private void increment( byte[] data )
+ {
+ increment( data, data.length-1 );
+ }
+
+ private void increment( byte[] data, int position )
+ {
+ if( position > 0 && (data[position]+256)%256 == 255 )
+ {
+ data[position]=0;
+ increment( data, position-1);
+ }
+ else
+ {
+ data[position] = (byte)(data[position]+1);
+ }
+ }
+
+ private int createIntFromBytes(byte[] bytes)
+ {
+ int intValue = (bytes[0]+256)%256;
+ if (bytes.length == 2)
+ {
+ intValue <<= 8;
+ intValue += (bytes[1]+256)%256;
+ }
+ return intValue;
+ }
+
+ private String createStringFromBytes( byte[] bytes ) throws IOException
+ {
+ String retval = null;
+ if( bytes.length == 1 )
+ {
+ retval = new String( bytes, "ISO-8859-1" );
+ }
+ else
+ {
+ retval = new String( bytes, "UTF-16BE" );
+ }
+ return retval;
+ }
+
+ private int compare( byte[] first, byte[] second )
+ {
+ int retval = 1;
+ int firstLength = first.length;
+ for( int i=0; i<firstLength; i++ )
+ {
+ if( first[i] == second[i] )
+ {
+ continue;
+ }
+ else if( ((first[i]+256)%256) < ((second[i]+256)%256) )
+ {
+ retval = -1;
+ break;
+ }
+ else
+ {
+ retval = 1;
+ break;
+ }
+ }
+ return retval;
+ }
+
+ /**
+ * Internal class.
+ */
+ private class LiteralName
+ {
+ private String name;
+ private LiteralName( String theName )
+ {
+ name = theName;
+ }
+ }
+
+ /**
+ * Internal class.
+ */
+ private class Operator
+ {
+ private String op;
+ private Operator( String theOp )
+ {
+ op = theOp;
+ }
+ }
+
+ /**
+ * A simple class to test parsing of cmap files.
+ *
+ * @param args Some command line arguments.
+ *
+ * @throws Exception If there is an error parsing the file.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ if( args.length != 1 )
+ {
+ System.err.println( "usage: java org.pdfbox.cmapparser.CMapParser <CMAP File>" );
+ System.exit( -1 );
+ }
+ CMapParser parser = new CMapParser( );
+ File cmapFile = new File( args[0] );
+ CMap result = parser.parse( cmapFile );
+ System.out.println( "Result:" + result );
+ }
}
\ No newline at end of file