You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2012/02/18 19:04:25 UTC

svn commit: r1290834 - /pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java

Author: leleueri
Date: Sat Feb 18 18:04:25 2012
New Revision: 1290834

URL: http://svn.apache.org/viewvc?rev=1290834&view=rev
Log:
[PDFBOX-1161] Addition of a control on the end of xxxrange & xxxchar operators

Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java?rev=1290834&r1=1290833&r2=1290834&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/cmap/CMapParser.java Sat Feb 18 18:04:25 2012
@@ -37,613 +37,654 @@ import org.apache.fontbox.util.ResourceL
  */
 public class CMapParser
 {
-    private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange";
-    private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar";
-    private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange";
-    private static final String BEGIN_CID_CHAR = "begincidchar";
-    private static final String BEGIN_CID_RANGE = "begincidrange";
-    private static final String USECMAP = "usecmap";
-    
-    private static final String WMODE = "WMode";
-    private static final String CMAP_NAME = "CMapName";
-    private static final String CMAP_VERSION = "CMapVersion";
-    private static final String CMAP_TYPE = "CMapType";
-    private static final String REGISTRY = "Registry";
-    private static final String ORDERING = "Ordering";
-    private static final String SUPPLEMENT = "Supplement";
-    
-    private static final String MARK_END_OF_DICTIONARY = ">>";
-    private static final String MARK_END_OF_ARRAY = "]";
-    
-    
-    private byte[] tokenParserByteBuffer = new byte[512];
-
-    /**
-     * Creates a new instance of CMapParser.
-     */
-    public CMapParser()
-    {
-    }
-    
-    /**
-     * Parse a CMAP file on the file system.
-     * 
-     * @param file The file to parse.
-     * 
-     * @return A parsed CMAP file.
-     * 
-     * @throws IOException If there is an issue while parsing the CMAP.
-     */
-    public CMap parse( File file ) throws IOException
-    {
-        String rootDir = file.getParent() + File.separator;
-        FileInputStream input = null;
-        try
-        {
-            input = new FileInputStream( file );
-            return parse( rootDir, input );
-        }
-        finally
-        {
-            if( input != null )
-            {
-                input.close();
-            }
-        }
-        
-    }
-
-    /**
-     * This will parse the stream and create a cmap object.
-     *
-     * @param resourceRoot The root path to the cmap file.  This will be used
-     *                     to find referenced cmap files.  It can be null.
-     * @param input The CMAP stream to parse.
-     * 
-     * @return The parsed stream as a java object.
-     *
-     * @throws IOException If there is an error parsing the stream.
-     */
-    public CMap parse( String resourceRoot, InputStream input ) throws IOException
-    {
-        PushbackInputStream cmapStream = new PushbackInputStream( input );
-        CMap result = new CMap();
-        Object previousToken = null;
-        Object token = null;
-        while( (token = parseNextToken( cmapStream )) != null )
-        {
-            if( token instanceof Operator )
-            {
-                Operator op = (Operator)token;
-                if( op.op.equals( USECMAP ) )
-                {
-                    LiteralName useCmapName = (LiteralName)previousToken;
-                    InputStream useStream = ResourceLoader.loadResource( resourceRoot + useCmapName.name );
-                    if( useStream == null )
-                    {
-                        throw new IOException( "Error: Could not find referenced cmap stream " + useCmapName.name );
-                    }
-                    CMap useCMap = parse( resourceRoot, useStream );
-                    result.useCmap( useCMap );
-                }
-                else if( op.op.equals( BEGIN_CODESPACE_RANGE ) )
-                {
-                    Number cosCount = (Number)previousToken;
-                    for( int j=0; j<cosCount.intValue(); j++ )
-                    {
-                        byte[] startRange = (byte[])parseNextToken( cmapStream );
-                        byte[] endRange = (byte[])parseNextToken( cmapStream );
-                        CodespaceRange range = new CodespaceRange();
-                        range.setStart( startRange );
-                        range.setEnd( endRange );
-                        result.addCodespaceRange( range );
-                    }
-                }
-                else if( op.op.equals( BEGIN_BASE_FONT_CHAR ) )
-                {
-                    Number cosCount = (Number)previousToken;
-                    for( int j=0; j<cosCount.intValue(); j++ )
-                    {
-                        byte[] inputCode = (byte[])parseNextToken( cmapStream );
-                        Object nextToken = parseNextToken( cmapStream );
-                        if( nextToken instanceof byte[] )
-                        {
-                            byte[] bytes = (byte[])nextToken;
-                            String value = createStringFromBytes( bytes );
-                            result.addMapping( inputCode, value );
-                        }
-                        else if( nextToken instanceof LiteralName )
-                        {
-                            result.addMapping( inputCode, ((LiteralName)nextToken).name );
-                        }
-                        else
-                        {
-                            throw new IOException( "Error parsing CMap beginbfchar, expected{COSString " +
-                                                   "or COSName} and not " + nextToken );
-                        }
-                    }
-                }
-                else if( op.op.equals( BEGIN_BASE_FONT_RANGE ) )
-                {
-                    Number cosCount = (Number)previousToken;
-                    
-                    for( int j=0; j<cosCount.intValue(); j++ )
-                    {
-                        byte[] startCode = (byte[])parseNextToken( cmapStream );
-                        byte[] endCode = (byte[])parseNextToken( cmapStream );
-                        Object nextToken = parseNextToken( cmapStream );
-                        List<byte[]> array = null;
-                        byte[] tokenBytes = null;
-                        if( nextToken instanceof List<?> )
-                        {
-                            array = (List<byte[]>)nextToken;
-                            tokenBytes = array.get( 0 );
-                        }
-                        else
-                        {
-                            tokenBytes = (byte[])nextToken;
-                        }
-                        
-                        String value = null;
-                        
-                        int arrayIndex = 0;
-                        boolean done = false;
-                        while( !done )
-                        {
-                            if( compare( startCode, endCode ) >= 0 )
-                            {
-                                done = true;
-                            }
-                            value = createStringFromBytes( tokenBytes );
-                            result.addMapping( startCode, value );
-                            increment( startCode );
-                            
-                            if( array == null )
-                            {
-                                increment( tokenBytes );
-                            }
-                            else
-                            {
-                                arrayIndex++;
-                                if( arrayIndex < array.size() )
-                                {
-                                    tokenBytes = (byte[])array.get( arrayIndex );
-                                }
-                            }
-                        }
-                    }
-                }
-                else if( op.op.equals( BEGIN_CID_CHAR ) )
-                {
-                    Number cosCount = (Number)previousToken;
-                    for( int j=0; j<cosCount.intValue(); j++ )
-                    {
-                        byte[] inputCode = (byte[])parseNextToken( cmapStream );
-                        int mappedCode = (Integer)parseNextToken( cmapStream );
-                        String mappedStr = createStringFromBytes(inputCode);
-                        result.addCIDMapping(mappedCode, mappedStr);
-                    }
-                }
-                else if( op.op.equals( BEGIN_CID_RANGE ) )
-                {
-                    int numberOfLines = (Integer)previousToken;
-                    for (int n=0; n < numberOfLines;n++) {
-                        byte[] startCode = (byte[])parseNextToken( cmapStream );
-                        int start = createIntFromBytes(startCode);
-                        byte[] endCode = (byte[])parseNextToken( cmapStream );
-                        int end = createIntFromBytes(endCode);
-                        int mappedCode = (Integer)parseNextToken( cmapStream );
-                        if (startCode.length <= 2 && endCode.length <= 2) {
-                            result.addCIDRange(
-                                    (char) start, (char) end, mappedCode);
-                        } else {
-                            // TODO: Is this even possible?
-                            int endOfMappings = mappedCode + end-start;
-                            while (mappedCode<=endOfMappings) {
-                                String mappedStr = createStringFromBytes(startCode);
-                                result.addCIDMapping(mappedCode++, mappedStr);
-                                increment(startCode);
-                            }
-                        }
-                    }
-                }
-            }
-            else if (token instanceof LiteralName){
-                LiteralName literal = (LiteralName)token;
-                if (WMODE.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof Integer)
-                    {
-                        result.setWMode((Integer)next);
-                    }
-                }
-                else if (CMAP_NAME.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof LiteralName)
-                    {
-                        result.setName(((LiteralName)next).name);
-                    }
-                }
-                else if (CMAP_VERSION.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof Number)
-                    {
-                        result.setVersion(((Number)next).toString());
-                    }
-                    else if (next instanceof String)
-                    {
-                        result.setVersion((String)next);
-                    }
-                }
-                else if (CMAP_TYPE.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof Integer)
-                    {
-                        result.setType((Integer)next);
-                    }
-                }
-                else if (REGISTRY.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof String)
-                    {
-                        result.setRegistry((String)next);
-                    }
-                }
-                else if (ORDERING.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof String)
-                    {
-                        result.setOrdering((String)next);
-                    }
-                }
-                else if (SUPPLEMENT.equals(literal.name)) 
-                {
-                    Object next = parseNextToken(cmapStream);
-                    if (next instanceof Integer)
-                    {
-                        result.setSupplement((Integer)next);
-                    }
-                }
-            }
-            previousToken = token;
-        }
-        return result;
-    }
-    
-    private Object parseNextToken( PushbackInputStream is ) throws IOException
-    {
-        Object retval = null;
-        int nextByte = is.read();
-        //skip whitespace
-        while( nextByte == 0x09 || nextByte == 0x20 || nextByte == 0x0D || nextByte == 0x0A )
-        {
-            nextByte = is.read();
-        }
-        switch( nextByte )
-        {
-            case '%':
-            {
-                //header operations, for now return the entire line 
-                //may need to smarter in the future
-                StringBuffer buffer = new StringBuffer();
-                buffer.append( (char)nextByte );
-                readUntilEndOfLine( is, buffer );
-                retval = buffer.toString();
-                break;
-            }
-            case '(':
-            {
-                StringBuffer buffer = new StringBuffer();
-                int stringByte = is.read();
-                
-                while( stringByte != -1 && stringByte != ')' )
-                {
-                    buffer.append( (char)stringByte );
-                    stringByte = is.read();
-                }
-                retval = buffer.toString();
-                break;
-            }
-            case '>':
-            {
-                int secondCloseBrace = is.read();
-                if( secondCloseBrace == '>' )
-                {
-                    retval = MARK_END_OF_DICTIONARY;
-                }
-                else
-                {
-                    throw new IOException( "Error: expected the end of a dictionary.");
-                }
-                break;
-            }
-            case ']':
-            {
-                retval = MARK_END_OF_ARRAY;
-                break;
-            }
-            case '[':
-            {
-                List<Object> list = new ArrayList<Object>();
-                
-                Object nextToken = parseNextToken( is ); 
-                while( nextToken != null && nextToken != MARK_END_OF_ARRAY )
-                {
-                    list.add( nextToken );
-                    nextToken = parseNextToken( is );
-                }
-                retval = list;
-                break;
-            }
-            case '<':
-            {
-                int theNextByte = is.read();
-                if( theNextByte == '<' )
-                {
-                    Map<String,Object> result = new HashMap<String,Object>();
-                    //we are reading a dictionary
-                    Object key = parseNextToken( is ); 
-                    while( key instanceof LiteralName && key != MARK_END_OF_DICTIONARY )
-                    {
-                        Object value = parseNextToken( is );
-                        result.put( ((LiteralName)key).name, value );
-                        key = parseNextToken( is );
-                    }
-                    retval = result;
-                }
-                else
-                {
-                    //won't read more than 512 bytes
-                    
-                    int multiplyer = 16;
-                    int bufferIndex = -1;
-                    while( theNextByte != -1 && theNextByte != '>' )
-                    {
-                        int intValue = 0;
-                        if( theNextByte >= '0' && theNextByte <= '9' )
-                        {
-                            intValue = theNextByte - '0';
-                        }
-                        else if( theNextByte >= 'A' && theNextByte <= 'F' )
-                        {
-                            intValue = 10 + theNextByte - 'A';
-                        }
-                        else if( theNextByte >= 'a' && theNextByte <= 'f' )
-                        {
-                            intValue = 10 + theNextByte - 'a';
-                        }
-                        else if( theNextByte == 0x20 )
-                        {
-                            // skipping whitespaces
-                            theNextByte = is.read();
-                            continue;
-                        }
-                        else
-                        {
-                            throw new IOException( "Error: expected hex character and not " + 
-                                (char)theNextByte + ":" + theNextByte );
-                        }
-                        intValue *= multiplyer;
-                        if( multiplyer == 16 )
-                        {
-                            bufferIndex++;
-                            tokenParserByteBuffer[bufferIndex] = 0;
-                            multiplyer = 1;
-                        }
-                        else
-                        {
-                            multiplyer = 16;
-                        }
-                        tokenParserByteBuffer[bufferIndex]+= intValue;
-                        theNextByte = is.read();
-                    }
-                    byte[] finalResult = new byte[bufferIndex+1];
-                    System.arraycopy(tokenParserByteBuffer,0,finalResult, 0, bufferIndex+1);
-                    retval = finalResult;
-                }
-                break;
-            }
-            case '/':
-            {
-                StringBuffer buffer = new StringBuffer();
-                int stringByte = is.read();
-                
-                while( !isWhitespaceOrEOF( stringByte ) )
-                {
-                    buffer.append( (char)stringByte );
-                    stringByte = is.read();
-                }
-                retval = new LiteralName( buffer.toString() );
-                break;
-            }
-            case -1:
-            {
-                //EOF return null;
-                break;
-            }
-            case '0':
-            case '1':
-            case '2':
-            case '3':
-            case '4':
-            case '5':
-            case '6':
-            case '7':
-            case '8':
-            case '9':
-            {
-                StringBuffer buffer = new StringBuffer();
-                buffer.append( (char)nextByte );
-                nextByte = is.read();
-                
-                while( !isWhitespaceOrEOF( nextByte ) &&
-                        (Character.isDigit( (char)nextByte )||
-                         nextByte == '.' ) )
-                {
-                    buffer.append( (char)nextByte );
-                    nextByte = is.read();
-                }
-                is.unread( nextByte );
-                String value = buffer.toString();
-                if( value.indexOf( '.' ) >=0 )
-                {
-                    retval = new Double( value );
-                }
-                else
-                {
-                    retval = new Integer( value );
-                }
-                break;
-            }
-            default:
-            {
-                StringBuffer buffer = new StringBuffer();
-                buffer.append( (char)nextByte );
-                nextByte = is.read();
-                
-                while( !isWhitespaceOrEOF( nextByte ) )
-                {
-                    buffer.append( (char)nextByte );
-                    nextByte = is.read();
-                }
-                retval = new Operator( buffer.toString() );                        
-                
-                break;
-            }
-        }
-        return retval;
-    }
-    
-    private void readUntilEndOfLine( InputStream is, StringBuffer buf ) throws IOException
-    {
-        int nextByte = is.read();
-        while( nextByte != -1 && nextByte != 0x0D && nextByte != 0x0A )
-        {
-            buf.append( (char)nextByte );
-            nextByte = is.read();
-        }
-    }
-    
-    private boolean isWhitespaceOrEOF( int aByte )
-    {
-        return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A; 
-    }
-    
-
-    private void increment( byte[] data )
-    {
-        increment( data, data.length-1 );
-    }
-
-    private void increment( byte[] data, int position )
-    {
-        if( position > 0 && (data[position]+256)%256 == 255 )
-        {
-            data[position]=0;
-            increment( data, position-1);
-        }
-        else
-        {
-            data[position] = (byte)(data[position]+1);
-        }
-    }
-    
-    private int createIntFromBytes(byte[] bytes) 
-    {
-        int intValue = (bytes[0]+256)%256;
-        if (bytes.length == 2) 
-        {
-            intValue <<= 8;
-            intValue += (bytes[1]+256)%256;
-        }
-        return intValue;
-    }
-    
-    private String createStringFromBytes( byte[] bytes ) throws IOException
-    {
-        String retval = null;
-        if( bytes.length == 1 )
-        {
-            retval = new String( bytes, "ISO-8859-1" );
-        }
-        else
-        {
-            retval = new String( bytes, "UTF-16BE" );
-        }
-        return retval;
-    }
-
-    private int compare( byte[] first, byte[] second )
-    {
-        int retval = 1;
-        int firstLength = first.length;
-        for( int i=0; i<firstLength; i++ )
-        {
-            if( first[i] == second[i] )
-            {
-                continue;
-            }
-            else if( ((first[i]+256)%256) < ((second[i]+256)%256) )
-            {
-                retval = -1;
-                break;
-            }
-            else
-            {
-                retval = 1;
-                break;
-            }
-        }
-        return retval;
-    }
-    
-    /**
-     * Internal class.
-     */
-    private class LiteralName
-    {
-        private String name;
-        private LiteralName( String theName )
-        {
-            name = theName;
-        }
-    }
-    
-    /**
-     * Internal class.
-     */
-    private class Operator
-    {
-        private String op;
-        private Operator( String theOp )
-        {
-            op = theOp;
-        }
-    }
-    
-    /**
-     * A simple class to test parsing of cmap files.
-     * 
-     * @param args Some command line arguments.
-     * 
-     * @throws Exception If there is an error parsing the file.
-     */
-    public static void main( String[] args ) throws Exception
-    {
-        if( args.length != 1 )
-        {
-            System.err.println( "usage: java org.pdfbox.cmapparser.CMapParser <CMAP File>" );
-            System.exit( -1 );
-        }
-        CMapParser parser = new CMapParser(  );
-        File cmapFile = new File( args[0] );
-        CMap result = parser.parse( cmapFile );
-        System.out.println( "Result:" + result );
-    }
+	private static final String BEGIN_CODESPACE_RANGE = "begincodespacerange";
+	private static final String BEGIN_BASE_FONT_CHAR = "beginbfchar";
+	private static final String BEGIN_BASE_FONT_RANGE = "beginbfrange";
+	private static final String BEGIN_CID_CHAR = "begincidchar";
+	private static final String BEGIN_CID_RANGE = "begincidrange";
+	private static final String USECMAP = "usecmap";
+
+	private static final String END_CODESPACE_RANGE = "endcodespacerange";
+	private static final String END_BASE_FONT_CHAR = "endbfchar";
+	private static final String END_BASE_FONT_RANGE = "endbfrange";
+	private static final String END_CID_CHAR = "endcidchar";    
+	private static final String END_CID_RANGE = "endcidrange";
+
+	private static final String WMODE = "WMode";
+	private static final String CMAP_NAME = "CMapName";
+	private static final String CMAP_VERSION = "CMapVersion";
+	private static final String CMAP_TYPE = "CMapType";
+	private static final String REGISTRY = "Registry";
+	private static final String ORDERING = "Ordering";
+	private static final String SUPPLEMENT = "Supplement";
+
+	private static final String MARK_END_OF_DICTIONARY = ">>";
+	private static final String MARK_END_OF_ARRAY = "]";
+
+
+	private byte[] tokenParserByteBuffer = new byte[512];
+
+	/**
+	 * Creates a new instance of CMapParser.
+	 */
+	public CMapParser()
+	{
+	}
+
+	/**
+	 * Parse a CMAP file on the file system.
+	 * 
+	 * @param file The file to parse.
+	 * 
+	 * @return A parsed CMAP file.
+	 * 
+	 * @throws IOException If there is an issue while parsing the CMAP.
+	 */
+	public CMap parse( File file ) throws IOException
+	{
+		String rootDir = file.getParent() + File.separator;
+		FileInputStream input = null;
+		try
+		{
+			input = new FileInputStream( file );
+			return parse( rootDir, input );
+		}
+		finally
+		{
+			if( input != null )
+			{
+				input.close();
+			}
+		}
+
+	}
+
+	/**
+	 * This will parse the stream and create a cmap object.
+	 *
+	 * @param resourceRoot The root path to the cmap file.  This will be used
+	 *                     to find referenced cmap files.  It can be null.
+	 * @param input The CMAP stream to parse.
+	 * 
+	 * @return The parsed stream as a java object.
+	 *
+	 * @throws IOException If there is an error parsing the stream.
+	 */
+	public CMap parse( String resourceRoot, InputStream input ) throws IOException
+	{
+		PushbackInputStream cmapStream = new PushbackInputStream( input );
+		CMap result = new CMap();
+		Object previousToken = null;
+		Object token = null;
+		while( (token = parseNextToken( cmapStream )) != null )
+		{
+			if( token instanceof Operator )
+			{
+				Operator op = (Operator)token;
+				if( op.op.equals( USECMAP ) )
+				{
+					LiteralName useCmapName = (LiteralName)previousToken;
+					InputStream useStream = ResourceLoader.loadResource( resourceRoot + useCmapName.name );
+					if( useStream == null )
+					{
+						throw new IOException( "Error: Could not find referenced cmap stream " + useCmapName.name );
+					}
+					CMap useCMap = parse( resourceRoot, useStream );
+					result.useCmap( useCMap );
+				}
+				else if( op.op.equals( BEGIN_CODESPACE_RANGE ) )
+				{
+					Number cosCount = (Number)previousToken;
+					for( int j=0; j<cosCount.intValue(); j++ )
+					{
+						Object nextToken = parseNextToken( cmapStream );
+						if (nextToken instanceof Operator) {
+							if (!((Operator)nextToken).op.equals( END_CODESPACE_RANGE )) {
+								throw new IOException("Error : ~codespacerange contains an unexpected operator : " + ((Operator)nextToken).op);
+							}
+							break;
+						}
+						byte[] startRange = (byte[])nextToken;
+						byte[] endRange = (byte[])parseNextToken( cmapStream );
+						CodespaceRange range = new CodespaceRange();
+						range.setStart( startRange );
+						range.setEnd( endRange );
+						result.addCodespaceRange( range );
+					}
+				}
+				else if( op.op.equals( BEGIN_BASE_FONT_CHAR ) )
+				{
+					Number cosCount = (Number)previousToken;
+					for( int j=0; j<cosCount.intValue(); j++ )
+					{
+						Object nextToken = parseNextToken( cmapStream );
+						if (nextToken instanceof Operator) {
+							if (!((Operator)nextToken).op.equals( END_BASE_FONT_CHAR )) {
+								throw new IOException("Error : ~bfchar contains an unexpected operator : " + ((Operator)nextToken).op);
+							}
+							break;
+						}
+						byte[] inputCode = (byte[])nextToken;
+						nextToken = parseNextToken( cmapStream );
+						if( nextToken instanceof byte[] )
+						{
+							byte[] bytes = (byte[])nextToken;
+							String value = createStringFromBytes( bytes );
+							result.addMapping( inputCode, value );
+						}
+						else if( nextToken instanceof LiteralName )
+						{
+							result.addMapping( inputCode, ((LiteralName)nextToken).name );
+						}
+						else
+						{
+							throw new IOException( "Error parsing CMap beginbfchar, expected{COSString " +
+									"or COSName} and not " + nextToken );
+						}
+					}
+				}
+				else if( op.op.equals( BEGIN_BASE_FONT_RANGE ) )
+				{
+					Number cosCount = (Number)previousToken;
+
+					for( int j=0; j<cosCount.intValue(); j++ )
+					{
+						Object nextToken = parseNextToken( cmapStream );
+						if (nextToken instanceof Operator) {
+							if (!((Operator)nextToken).op.equals( END_BASE_FONT_RANGE )) {
+								throw new IOException("Error : ~bfrange contains an unexpected operator : " + ((Operator)nextToken).op);
+							}
+							break;
+						}
+						byte[] startCode = (byte[])nextToken;
+						byte[] endCode = (byte[])parseNextToken( cmapStream );
+						nextToken = parseNextToken( cmapStream );
+						List<byte[]> array = null;
+						byte[] tokenBytes = null;
+						if( nextToken instanceof List<?> )
+						{
+							array = (List<byte[]>)nextToken;
+							tokenBytes = array.get( 0 );
+						}
+						else
+						{
+							tokenBytes = (byte[])nextToken;
+						}
+
+						String value = null;
+
+						int arrayIndex = 0;
+						boolean done = false;
+						while( !done )
+						{
+							if( compare( startCode, endCode ) >= 0 )
+							{
+								done = true;
+							}
+							value = createStringFromBytes( tokenBytes );
+							result.addMapping( startCode, value );
+							increment( startCode );
+
+							if( array == null )
+							{
+								increment( tokenBytes );
+							}
+							else
+							{
+								arrayIndex++;
+								if( arrayIndex < array.size() )
+								{
+									tokenBytes = (byte[])array.get( arrayIndex );
+								}
+							}
+						}
+					}
+				}
+				else if( op.op.equals( BEGIN_CID_CHAR ) )
+				{
+					Number cosCount = (Number)previousToken;
+					for( int j=0; j<cosCount.intValue(); j++ )
+					{
+						Object nextToken = parseNextToken( cmapStream );
+						if (nextToken instanceof Operator) {
+							if (!((Operator)nextToken).op.equals( END_CID_CHAR )) {
+								throw new IOException("Error : ~cidchar contains an unexpected operator : " + ((Operator)nextToken).op);
+							}
+							break;
+						}
+						byte[] inputCode = (byte[])nextToken;
+						int mappedCode = (Integer)parseNextToken( cmapStream );
+						String mappedStr = createStringFromBytes(inputCode);
+						result.addCIDMapping(mappedCode, mappedStr);
+					}
+				}
+				else if( op.op.equals( BEGIN_CID_RANGE ) )
+				{
+					int numberOfLines = (Integer)previousToken;
+					for (int n=0; n < numberOfLines;n++) {
+						Object nextToken = parseNextToken( cmapStream );
+						if (nextToken instanceof Operator) {
+							if (!((Operator)nextToken).op.equals( END_CID_RANGE )) {
+								throw new IOException("Error : ~cidrange contains an unexpected operator : " + ((Operator)nextToken).op);
+							}
+							break;
+						}
+						byte[] startCode = (byte[])nextToken;
+						int start = createIntFromBytes(startCode);
+						byte[] endCode = (byte[])parseNextToken( cmapStream );
+						int end = createIntFromBytes(endCode);
+						int mappedCode = (Integer)parseNextToken( cmapStream );
+						if (startCode.length <= 2 && endCode.length <= 2) {
+							result.addCIDRange(
+									(char) start, (char) end, mappedCode);
+						} else {
+							// TODO: Is this even possible?
+							int endOfMappings = mappedCode + end-start;
+							while (mappedCode<=endOfMappings) {
+								String mappedStr = createStringFromBytes(startCode);
+								result.addCIDMapping(mappedCode++, mappedStr);
+								increment(startCode);
+							}
+						}
+					}
+				}
+			}
+			else if (token instanceof LiteralName){
+				LiteralName literal = (LiteralName)token;
+				if (WMODE.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof Integer)
+					{
+						result.setWMode((Integer)next);
+					}
+				}
+				else if (CMAP_NAME.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof LiteralName)
+					{
+						result.setName(((LiteralName)next).name);
+					}
+				}
+				else if (CMAP_VERSION.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof Number)
+					{
+						result.setVersion(((Number)next).toString());
+					}
+					else if (next instanceof String)
+					{
+						result.setVersion((String)next);
+					}
+				}
+				else if (CMAP_TYPE.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof Integer)
+					{
+						result.setType((Integer)next);
+					}
+				}
+				else if (REGISTRY.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof String)
+					{
+						result.setRegistry((String)next);
+					}
+				}
+				else if (ORDERING.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof String)
+					{
+						result.setOrdering((String)next);
+					}
+				}
+				else if (SUPPLEMENT.equals(literal.name)) 
+				{
+					Object next = parseNextToken(cmapStream);
+					if (next instanceof Integer)
+					{
+						result.setSupplement((Integer)next);
+					}
+				}
+			}
+			previousToken = token;
+		}
+		return result;
+	}
+
+	private Object parseNextToken( PushbackInputStream is ) throws IOException
+	{
+		Object retval = null;
+		int nextByte = is.read();
+		//skip whitespace
+		while( nextByte == 0x09 || nextByte == 0x20 || nextByte == 0x0D || nextByte == 0x0A )
+		{
+			nextByte = is.read();
+		}
+		switch( nextByte )
+		{
+		case '%':
+		{
+			//header operations, for now return the entire line 
+			//may need to smarter in the future
+			StringBuffer buffer = new StringBuffer();
+			buffer.append( (char)nextByte );
+			readUntilEndOfLine( is, buffer );
+			retval = buffer.toString();
+			break;
+		}
+		case '(':
+		{
+			StringBuffer buffer = new StringBuffer();
+			int stringByte = is.read();
+
+			while( stringByte != -1 && stringByte != ')' )
+			{
+				buffer.append( (char)stringByte );
+				stringByte = is.read();
+			}
+			retval = buffer.toString();
+			break;
+		}
+		case '>':
+		{
+			int secondCloseBrace = is.read();
+			if( secondCloseBrace == '>' )
+			{
+				retval = MARK_END_OF_DICTIONARY;
+			}
+			else
+			{
+				throw new IOException( "Error: expected the end of a dictionary.");
+			}
+			break;
+		}
+		case ']':
+		{
+			retval = MARK_END_OF_ARRAY;
+			break;
+		}
+		case '[':
+		{
+			List<Object> list = new ArrayList<Object>();
+
+			Object nextToken = parseNextToken( is ); 
+			while( nextToken != null && nextToken != MARK_END_OF_ARRAY )
+			{
+				list.add( nextToken );
+				nextToken = parseNextToken( is );
+			}
+			retval = list;
+			break;
+		}
+		case '<':
+		{
+			int theNextByte = is.read();
+			if( theNextByte == '<' )
+			{
+				Map<String,Object> result = new HashMap<String,Object>();
+				//we are reading a dictionary
+				Object key = parseNextToken( is ); 
+				while( key instanceof LiteralName && key != MARK_END_OF_DICTIONARY )
+				{
+					Object value = parseNextToken( is );
+					result.put( ((LiteralName)key).name, value );
+					key = parseNextToken( is );
+				}
+				retval = result;
+			}
+			else
+			{
+				//won't read more than 512 bytes
+
+				int multiplyer = 16;
+				int bufferIndex = -1;
+				while( theNextByte != -1 && theNextByte != '>' )
+				{
+					int intValue = 0;
+					if( theNextByte >= '0' && theNextByte <= '9' )
+					{
+						intValue = theNextByte - '0';
+					}
+					else if( theNextByte >= 'A' && theNextByte <= 'F' )
+					{
+						intValue = 10 + theNextByte - 'A';
+					}
+					else if( theNextByte >= 'a' && theNextByte <= 'f' )
+					{
+						intValue = 10 + theNextByte - 'a';
+					}
+					else if( theNextByte == 0x20 )
+					{
+						// skipping whitespaces
+						theNextByte = is.read();
+						continue;
+					}
+					else
+					{
+						throw new IOException( "Error: expected hex character and not " + 
+								(char)theNextByte + ":" + theNextByte );
+					}
+					intValue *= multiplyer;
+					if( multiplyer == 16 )
+					{
+						bufferIndex++;
+						tokenParserByteBuffer[bufferIndex] = 0;
+						multiplyer = 1;
+					}
+					else
+					{
+						multiplyer = 16;
+					}
+					tokenParserByteBuffer[bufferIndex]+= intValue;
+					theNextByte = is.read();
+				}
+				byte[] finalResult = new byte[bufferIndex+1];
+				System.arraycopy(tokenParserByteBuffer,0,finalResult, 0, bufferIndex+1);
+				retval = finalResult;
+			}
+			break;
+		}
+		case '/':
+		{
+			StringBuffer buffer = new StringBuffer();
+			int stringByte = is.read();
+
+			while( !isWhitespaceOrEOF( stringByte ) )
+			{
+				buffer.append( (char)stringByte );
+				stringByte = is.read();
+			}
+			retval = new LiteralName( buffer.toString() );
+			break;
+		}
+		case -1:
+		{
+			//EOF return null;
+			break;
+		}
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		{
+			StringBuffer buffer = new StringBuffer();
+			buffer.append( (char)nextByte );
+			nextByte = is.read();
+
+			while( !isWhitespaceOrEOF( nextByte ) &&
+					(Character.isDigit( (char)nextByte )||
+							nextByte == '.' ) )
+			{
+				buffer.append( (char)nextByte );
+				nextByte = is.read();
+			}
+			is.unread( nextByte );
+			String value = buffer.toString();
+			if( value.indexOf( '.' ) >=0 )
+			{
+				retval = new Double( value );
+			}
+			else
+			{
+				retval = new Integer( value );
+			}
+			break;
+		}
+		default:
+		{
+			StringBuffer buffer = new StringBuffer();
+			buffer.append( (char)nextByte );
+			nextByte = is.read();
+
+			while( !isWhitespaceOrEOF( nextByte ) )
+			{
+				buffer.append( (char)nextByte );
+				nextByte = is.read();
+			}
+			retval = new Operator( buffer.toString() );                        
+
+			break;
+		}
+		}
+		return retval;
+	}
+
+	private void readUntilEndOfLine( InputStream is, StringBuffer buf ) throws IOException
+	{
+		int nextByte = is.read();
+		while( nextByte != -1 && nextByte != 0x0D && nextByte != 0x0A )
+		{
+			buf.append( (char)nextByte );
+			nextByte = is.read();
+		}
+	}
+
+	private boolean isWhitespaceOrEOF( int aByte )
+	{
+		return aByte == -1 || aByte == 0x20 || aByte == 0x0D || aByte == 0x0A; 
+	}
+
+
+	private void increment( byte[] data )
+	{
+		increment( data, data.length-1 );
+	}
+
+	private void increment( byte[] data, int position )
+	{
+		if( position > 0 && (data[position]+256)%256 == 255 )
+		{
+			data[position]=0;
+			increment( data, position-1);
+		}
+		else
+		{
+			data[position] = (byte)(data[position]+1);
+		}
+	}
+
+	private int createIntFromBytes(byte[] bytes) 
+	{
+		int intValue = (bytes[0]+256)%256;
+		if (bytes.length == 2) 
+		{
+			intValue <<= 8;
+			intValue += (bytes[1]+256)%256;
+		}
+		return intValue;
+	}
+
+	private String createStringFromBytes( byte[] bytes ) throws IOException
+	{
+		String retval = null;
+		if( bytes.length == 1 )
+		{
+			retval = new String( bytes, "ISO-8859-1" );
+		}
+		else
+		{
+			retval = new String( bytes, "UTF-16BE" );
+		}
+		return retval;
+	}
+
+	private int compare( byte[] first, byte[] second )
+	{
+		int retval = 1;
+		int firstLength = first.length;
+		for( int i=0; i<firstLength; i++ )
+		{
+			if( first[i] == second[i] )
+			{
+				continue;
+			}
+			else if( ((first[i]+256)%256) < ((second[i]+256)%256) )
+			{
+				retval = -1;
+				break;
+			}
+			else
+			{
+				retval = 1;
+				break;
+			}
+		}
+		return retval;
+	}
+
+	/**
+	 * Internal class.
+	 */
+	private class LiteralName
+	{
+		private String name;
+		private LiteralName( String theName )
+		{
+			name = theName;
+		}
+	}
+
+	/**
+	 * Internal class.
+	 */
+	private class Operator
+	{
+		private String op;
+		private Operator( String theOp )
+		{
+			op = theOp;
+		}
+	}
+
+	/**
+	 * A simple class to test parsing of cmap files.
+	 * 
+	 * @param args Some command line arguments.
+	 * 
+	 * @throws Exception If there is an error parsing the file.
+	 */
+	public static void main( String[] args ) throws Exception
+	{
+		if( args.length != 1 )
+		{
+			System.err.println( "usage: java org.pdfbox.cmapparser.CMapParser <CMAP File>" );
+			System.exit( -1 );
+		}
+		CMapParser parser = new CMapParser(  );
+		File cmapFile = new File( args[0] );
+		CMap result = parser.parse( cmapFile );
+		System.out.println( "Result:" + result );
+	}
 }
\ No newline at end of file