You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2010/09/15 19:27:17 UTC

svn commit: r997405 - in /pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf: AbstractTTFParser.java CIDFontType2Parser.java CMAPEncodingEntry.java GlyphTable.java TTFDataStream.java TTFParser.java

Author: lehmi
Date: Wed Sep 15 17:27:17 2010
New Revision: 997405

URL: http://svn.apache.org/viewvc?rev=997405&view=rev
Log:
PDFBOX-704: added support for additional CMap formats as suggested by Eric Leleu

Added:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/AbstractTTFParser.java   (with props)
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CIDFontType2Parser.java   (with props)
Modified:
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPEncodingEntry.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java
    pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java

Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/AbstractTTFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/AbstractTTFParser.java?rev=997405&view=auto
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/AbstractTTFParser.java (added)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/AbstractTTFParser.java Wed Sep 15 17:27:17 2010
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.ttf;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+abstract class AbstractTTFParser {
+	
+    protected boolean isEmbedded = false;
+
+	public AbstractTTFParser(boolean isEmbedded) {
+		this.isEmbedded = isEmbedded;
+	}
+	
+    /**
+     * Parse a file and get a true type font.
+     * @param ttfFile The TTF file.
+     * @return A true type font.
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    public TrueTypeFont parseTTF( String ttfFile ) throws IOException
+    {
+        RAFDataStream raf = new RAFDataStream( ttfFile, "r" );
+        return parseTTF( raf );
+    }
+    
+    /**
+     * Parse a file and get a true type font.
+     * @param ttfFile The TTF file.
+     * @return A true type font.
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    public TrueTypeFont parseTTF( File ttfFile ) throws IOException
+    {
+        RAFDataStream raf = new RAFDataStream( ttfFile, "r" );
+        return parseTTF( raf );
+    }
+    
+    /**
+     * Parse a file and get a true type font.
+     * @param ttfData The TTF data to parse.
+     * @return A true type font.
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    public TrueTypeFont parseTTF( InputStream ttfData ) throws IOException
+    {
+        return parseTTF( new MemoryTTFDataStream( ttfData ));
+    }
+    
+    /**
+     * Parse a file and get a true type font.
+     * @param raf The TTF file.
+     * @return A true type font.
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    public TrueTypeFont parseTTF( TTFDataStream raf ) throws IOException
+    {
+        TrueTypeFont font = new TrueTypeFont( raf );
+        font.setVersion( raf.read32Fixed() );
+        int numberOfTables = raf.readUnsignedShort();
+        int searchRange = raf.readUnsignedShort();
+        int entrySelector = raf.readUnsignedShort();
+        int rangeShift = raf.readUnsignedShort();
+        for( int i=0; i<numberOfTables; i++ )
+        {
+            TTFTable table = readTableDirectory( raf );   
+            font.addTable( table );
+        }
+
+        //need to initialize a couple tables in a certain order
+        parseTables(font, raf);
+
+        return font;
+    }
+
+    /**
+     * Parse all tables and check if all needed tables are present.
+     * @param font the TrueTypeFont instance holding the parsed data.
+     * @param raf the data stream of the to be parsed ttf font
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void parseTables(TrueTypeFont font, TTFDataStream raf)
+    throws IOException {
+        List<TTFTable> initialized = new ArrayList<TTFTable>();
+        HeaderTable head = font.getHeader();
+        if (head == null) 
+        {
+            throw new IOException("head is mandatory");
+        }
+        raf.seek( head.getOffset() );
+        head.initData( font, raf );
+        initialized.add( head );
+
+        HorizontalHeaderTable hh = font.getHorizontalHeader();
+        if (hh == null) 
+        {
+            throw new IOException("hhead is mandatory");
+        }
+        raf.seek( hh.getOffset() );
+        hh.initData( font, raf );
+        initialized.add( hh );
+
+        MaximumProfileTable maxp = font.getMaximumProfile();
+        if (maxp != null) 
+        {
+            raf.seek( maxp.getOffset() );
+            maxp.initData( font, raf );
+            initialized.add( maxp );
+        } 
+        else 
+        {
+            throw new IOException("maxp is mandatory");
+        }
+
+        PostScriptTable post = font.getPostScript();
+        if (post != null) {
+            raf.seek( post.getOffset() );
+            post.initData( font, raf );
+            initialized.add( post );
+        } 
+        else if ( !isEmbedded ) 
+        {
+            // in an embedded font this table is optional
+            throw new IOException("post is mandatory");
+        }
+
+        IndexToLocationTable loc = font.getIndexToLocation();
+        if (loc == null) 
+        {
+            throw new IOException("loca is mandatory");
+        }
+        raf.seek( loc.getOffset() );
+        loc.initData( font, raf );
+        initialized.add( loc );
+
+        boolean cvt = false, prep = false, fpgm = false;
+        Iterator<TTFTable> iter = font.getTables().iterator();
+        while( iter.hasNext() )
+        {
+            TTFTable table = iter.next();
+            if( !initialized.contains( table ) )
+            {
+                raf.seek( table.getOffset() );
+                table.initData( font, raf );
+            }
+            if (table.getTag().startsWith("cvt")) 
+            {
+                cvt = true;
+            } 
+            else if ("prep".equals(table.getTag())) 
+            {
+                prep = true;
+            } 
+            else if ("fpgm".equals(table.getTag())) 
+            {
+                fpgm = true;
+            }
+        }   
+
+        // check others mandatory tables
+        if ( font.getGlyph() == null )
+        {
+            throw new IOException("glyf is mandatory");
+        }
+        if ( font.getNaming() == null && !isEmbedded )
+        {
+            throw new IOException("name is mandatory");
+        }
+        if ( font.getHorizontalMetrics() == null )
+        {
+            throw new IOException("hmtx is mandatory");
+        }
+
+        if (isEmbedded) {
+            // in a embedded truetype font prep, cvt_ and fpgm tables 
+            // are mandatory
+            if (!fpgm) 
+            {
+                throw new IOException("fpgm is mandatory");
+            }
+            if (!prep)
+            {
+                throw new IOException("prep is mandatory");
+            }
+            if (!cvt) 
+            {
+                throw new IOException("cvt_ is mandatory");
+            }
+        }
+    }
+
+    private TTFTable readTableDirectory( TTFDataStream raf ) throws IOException
+    {
+        TTFTable retval = null;
+        String tag = raf.readString( 4 );
+        if( tag.equals( CMAPTable.TAG ) )
+        {
+            retval = new CMAPTable();
+        }
+        else if( tag.equals( GlyphTable.TAG ) )
+        {
+            retval = new GlyphTable();
+        }
+        else if( tag.equals( HeaderTable.TAG ) )
+        {
+            retval = new HeaderTable();
+        }
+        else if( tag.equals( HorizontalHeaderTable.TAG ) )
+        {
+            retval = new HorizontalHeaderTable();
+        }
+        else if( tag.equals( HorizontalMetricsTable.TAG ) )
+        {
+            retval = new HorizontalMetricsTable();
+        }
+        else if( tag.equals( IndexToLocationTable.TAG ) )
+        {
+            retval = new IndexToLocationTable();
+        }
+        else if( tag.equals( MaximumProfileTable.TAG ) )
+        {
+            retval = new MaximumProfileTable();
+        }
+        else if( tag.equals( NamingTable.TAG ) )
+        {
+            retval = new NamingTable();
+        }
+        else if( tag.equals( OS2WindowsMetricsTable.TAG ) )
+        {
+            retval = new OS2WindowsMetricsTable();
+        }
+        else if( tag.equals( PostScriptTable.TAG ) )
+        {
+            retval = new PostScriptTable();
+        }
+        else if( tag.equals( DigitalSignatureTable.TAG ) )
+        {
+            retval = new DigitalSignatureTable();
+        }
+        else
+        {
+            //unknown table type but read it anyway.
+            retval = new TTFTable();
+        }
+        retval.setTag( tag );
+        retval.setCheckSum( raf.readUnsignedInt() );
+        retval.setOffset( raf.readUnsignedInt() );
+        retval.setLength( raf.readUnsignedInt() );
+        return retval;
+    }
+}

Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/AbstractTTFParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CIDFontType2Parser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CIDFontType2Parser.java?rev=997405&view=auto
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CIDFontType2Parser.java (added)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CIDFontType2Parser.java Wed Sep 15 17:27:17 2010
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.fontbox.ttf;
+
+
+public class CIDFontType2Parser extends AbstractTTFParser
+{   
+	public CIDFontType2Parser() {
+		super(false);
+	}
+
+	public CIDFontType2Parser(boolean isEmbedded) {
+		super(isEmbedded);
+	}
+
+}

Propchange: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CIDFontType2Parser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPEncodingEntry.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPEncodingEntry.java?rev=997405&r1=997404&r2=997405&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPEncodingEntry.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/CMAPEncodingEntry.java Wed Sep 15 17:27:17 2010
@@ -59,143 +59,397 @@ public class CMAPEncodingEntry
     {
         data.seek( ttf.getCMAP().getOffset() + subTableOffset );
         int subtableFormat = data.readUnsignedShort();
-        int length = data.readUnsignedShort();
-        int version = data.readUnsignedShort();
-        int numGlyphs = ttf.getMaximumProfile().getNumGlyphs();
-        if( subtableFormat == 0 )
-        {
-            byte[] glyphMapping = data.read( 256 );
-            glyphIdToCharacterCode = new int[256];
-            for( int i=0;i < glyphMapping.length; i++ )
-            {
-                int glyphIndex = (glyphMapping[i]+256)%256;
-                glyphIdToCharacterCode[glyphIndex]=i;
-            }
+        long length;
+        long version;
+        int numGlyphs;
+        if (subtableFormat < 8) {
+            length = data.readUnsignedShort();
+            version = data.readUnsignedShort();
+            numGlyphs = ttf.getMaximumProfile().getNumGlyphs();
+        } else {
+            // read an other UnsignedShort to read a Fixed32
+            data.readUnsignedShort();
+            length = data.readUnsignedInt();
+            version = data.readUnsignedInt();
+            numGlyphs = ttf.getMaximumProfile().getNumGlyphs();         
+        }
+            
+        switch (subtableFormat) {
+        case 0:
+            processSubtype0(ttf, data);
+            break;
+        case 2:
+            processSubtype2(ttf, data, numGlyphs);
+            break;
+        case 4:
+            processSubtype4(ttf, data, numGlyphs);
+            break;
+        case 6:
+            processSubtype6(ttf, data, numGlyphs);
+            break;
+        case 8:
+            processSubtype8(ttf, data, numGlyphs);
+            break;
+        case 10:
+            processSubtype10(ttf, data, numGlyphs);
+            break;
+        case 12:
+            processSubtype12(ttf, data, numGlyphs);
+            break;
+        case 13:
+            processSubtype13(ttf, data, numGlyphs); 
+            break;
+        case 14:
+            processSubtype14(ttf, data, numGlyphs);
+            break;
+        default:
+            throw new IOException( "Unknown cmap format:" + subtableFormat );   
+        }
+    }
+          
+    /**
+     * Reads a format 8 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype8( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        // --- is32 is a 65536 BITS array ( = 8192 BYTES) 
+        int[] is32 = data.readUnsignedByteArray(8192);
+        long nbGroups = data.readUnsignedInt();
+            
+        // --- nbGroups shouldn't be greater than 65536
+        if (nbGroups > 65536) {
+            throw new IOException("CMap ( Subtype8 ) is invalid");
         }
-        else if( subtableFormat == 2 )
+            
+        glyphIdToCharacterCode = new int[numGlyphs];
+        // -- Read all sub header
+        for (long i = 0; i <= nbGroups ; ++i ) 
         {
-            int[] subHeaderKeys = new int[256];
-            // ---- keep the Max Index of the SubHeader array to know its length
-            int maxSubHeaderIndex = 0;
-            for( int i=0; i<256; i++)
-            {
-                subHeaderKeys[i] = data.readUnsignedShort();
-                maxSubHeaderIndex = Math.max(maxSubHeaderIndex, (int)(subHeaderKeys[i]/8));
-            }
+            long firstCode = data.readUnsignedInt();
+            long endCode = data.readUnsignedInt();
+            long startGlyph = data.readUnsignedInt();
                 
-            // ---- Read all SubHeaders to avoid useless seek on DataSource
-            SubHeader[] subHeaders = new SubHeader[maxSubHeaderIndex + 1]; 
-            for (int i = 0; i <= maxSubHeaderIndex ; ++i ) 
-            {
-                int firstCode = data.readUnsignedShort();
-                int entryCount = data.readUnsignedShort();
-                short idDelta = data.readSignedShort();
-                int idRangeOffset = data.readUnsignedShort();
-                subHeaders[i] = new SubHeader(firstCode, entryCount, idDelta, idRangeOffset);
+            // -- process simple validation
+            if (firstCode > endCode || 0 > firstCode) {
+                throw new IOException("Range invalid");
             }
                 
-            long startGlyphIndexOffset = data.getCurrentPosition();
-            glyphIdToCharacterCode = new int[numGlyphs];
-            for ( int i = 0; i <= maxSubHeaderIndex ; ++i )
-            {
-                SubHeader sh = subHeaders[i];
-                int firstCode = sh.getFirstCode();
-                for ( int j = 0 ; j < sh.getEntryCount() ; ++j)
-                {
-                    // ---- compute the Character Code
-                    int charCode = ( i * 8 );
-                    charCode = (charCode << 8 ) + (firstCode + j);
-                    
-                    // ---- Go to the CharacterCOde position in the Sub Array 
-                    //      of the glyphIndexArray 
-                    //      glyphIndexArray contains Unsigned Short so add (j * 2) bytes 
-                    //      at the index position
-                    data.seek(startGlyphIndexOffset + sh.getIdRangeOffset() + (j*2));
-                    int p = data.readUnsignedShort();
-                    // ---- compute the glyphIndex 
-                    p = p + sh.getIdDelta() % 65536;
+            for (long j = firstCode; j <= endCode; ++j) {
+                // -- Convert the Character code in decimal
+                if (j > Integer.MAX_VALUE) {
+                    throw new IOException("[Sub Format 8] Invalid Character code");
+                }
+                
+                int currentCharCode;
+                if ( (is32[ (int)j / 8 ] & (1 << ((int)j % 8 ))) == 0) {
+                    currentCharCode = (int)j;
+                } else {
+                    // the character code uses a 32bits format 
+                    // convert it in decimal : see http://www.unicode.org/faq//utf_bom.html#utf16-4
+                    long LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
+                    long SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
+                    long lead = LEAD_OFFSET + (j >> 10);
+                    long trail = 0xDC00 + (j & 0x3FF);
                     
-                    glyphIdToCharacterCode[p] = charCode;
-                    characterCodeToGlyphId.put(charCode, p);
+                    long codepoint = (lead << 10) + trail + SURROGATE_OFFSET;
+                    if (codepoint > Integer.MAX_VALUE) {
+                        throw new IOException("[Sub Format 8] Invalid Character code");
+                    }
+                    currentCharCode = (int)codepoint;
+                }
+                
+                long glyphIndex = startGlyph + (j-firstCode);
+                if (glyphIndex > numGlyphs || glyphIndex > Integer.MAX_VALUE) {
+                    throw new IOException("CMap contains an invalid glyph index");
+                }
+                
+                glyphIdToCharacterCode[(int)glyphIndex] = currentCharCode;
+                characterCodeToGlyphId.put(currentCharCode, (int)glyphIndex);
+            }
+        }
+    }
+    
+    /**
+     * Reads a format 10 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype10( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        long startCode = data.readUnsignedInt();
+        long numChars = data.readUnsignedInt();
+        if (numChars > Integer.MAX_VALUE) {
+            throw new IOException("Invalid number of Characters");
+        }
+        
+        if ( startCode < 0 || startCode > 0x0010FFFF 
+                || (startCode + numChars) > 0x0010FFFF
+                || ((startCode + numChars) >= 0x0000D800 && (startCode + numChars) <= 0x0000DFFF)) {
+            throw new IOException("Invalid Characters codes");
+            
+        }   
+    }   
+    
+    /**
+     * Reads a format 12 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype12( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        long nbGroups = data.readUnsignedInt();
+        glyphIdToCharacterCode = new int[numGlyphs];
+        for (long i = 0; i <= nbGroups ; ++i ) 
+        {
+            long firstCode = data.readUnsignedInt();
+            long endCode = data.readUnsignedInt();
+            long startGlyph = data.readUnsignedInt();
+                
+            if ( firstCode < 0 || firstCode > 0x0010FFFF 
+                    || ( firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF ) ) {
+                throw new IOException("Invalid Characters codes");
+            }
+            
+            if ( endCode > 0 && (endCode < firstCode || endCode > 0x0010FFFF 
+                    || ( endCode >= 0x0000D800 && endCode <= 0x0000DFFF ) ) ) {
+                throw new IOException("Invalid Characters codes");
+            }
+                       
+            for (long j = 0; j <= (endCode - firstCode); ++j) {
+                
+                if ( (firstCode + j) > Integer.MAX_VALUE ) {
+                    throw new IOException("Character Code greater than Integer.MAX_VALUE");                 
                 }
-            }        
+                           
+                long glyphIndex = (startGlyph + j);
+                if (glyphIndex > numGlyphs || glyphIndex > Integer.MAX_VALUE) {
+                    throw new IOException("CMap contains an invalid glyph index");
+                }
+                glyphIdToCharacterCode[(int)glyphIndex] = (int)(firstCode + j);
+                characterCodeToGlyphId.put((int)(firstCode + j), (int)glyphIndex);
+            }
         }
-        else if( subtableFormat == 4 )
+    }
+            
+    /**
+     * Reads a format 13 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype13( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        long nbGroups = data.readUnsignedInt();
+        for (long i = 0; i <= nbGroups ; ++i ) 
         {
-            int segCountX2 = data.readUnsignedShort();
-            int segCount = segCountX2/2;
-            int searchRange = data.readUnsignedShort();
-            int entrySelector = data.readUnsignedShort();
-            int rangeShift = data.readUnsignedShort();
-            int[] endCount = data.readUnsignedShortArray( segCount );
-            int reservedPad = data.readUnsignedShort();
-            int[] startCount = data.readUnsignedShortArray( segCount );
-            int[] idDelta = data.readUnsignedShortArray( segCount );
-            int[] idRangeOffset = data.readUnsignedShortArray( segCount );
-            
-            //this is the final result
-            //key=glyphId, value is character codes
-            glyphIdToCharacterCode = new int[numGlyphs];
+            long firstCode = data.readUnsignedInt();
+            long endCode = data.readUnsignedInt();
+            long glyphId = data.readUnsignedInt();
+                
+            if (glyphId > numGlyphs) {
+                throw new IOException("CMap contains an invalid glyph index");  
+            }
+            
+            if ( firstCode < 0 || firstCode > 0x0010FFFF 
+                    || ( firstCode >= 0x0000D800 && firstCode <= 0x0000DFFF ) ) {
+                throw new IOException("Invalid Characters codes");
+            }
+            
+            if ( endCode > 0 && (endCode < firstCode || endCode > 0x0010FFFF 
+                    || ( endCode >= 0x0000D800 && endCode <= 0x0000DFFF )) ) {
+                throw new IOException("Invalid Characters codes");
+            }
             
-            long currentPosition = data.getCurrentPosition();
+            for (long j = 0; j <= (endCode - firstCode); ++j) {
+                
+                if ( (firstCode + j) > Integer.MAX_VALUE ) {
+                    throw new IOException("Character Code greater than Integer.MAX_VALUE");                 
+                }
+                glyphIdToCharacterCode[(int)glyphId] = (int)(firstCode + j);
+                characterCodeToGlyphId.put((int)(firstCode + j), (int)glyphId);
+            }
+        }
+    }
+            
+    /**
+     * Reads a format 14 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype14( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        throw new IOException("CMap subtype 14 not yet implemented");
+    }
+                
+    /**
+     * Reads a format 6 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype6( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        int firstCode = data.readUnsignedShort();
+        int entryCount = data.readUnsignedShort();
+        glyphIdToCharacterCode = new int[numGlyphs];
+        int[] glyphIdArray = data.readUnsignedShortArray( entryCount );
+        for( int i=0; i<entryCount; i++)
+        {
+            glyphIdToCharacterCode[glyphIdArray[i]] = firstCode+i;
+            characterCodeToGlyphId.put((firstCode+i), glyphIdArray[i]);
+        }
+    }
+    
+    /**
+     * Reads a format 4 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype4( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        int segCountX2 = data.readUnsignedShort();
+        int segCount = segCountX2/2;
+        int searchRange = data.readUnsignedShort();
+        int entrySelector = data.readUnsignedShort();
+        int rangeShift = data.readUnsignedShort();
+        int[] endCount = data.readUnsignedShortArray( segCount );
+        int reservedPad = data.readUnsignedShort();
+        int[] startCount = data.readUnsignedShortArray( segCount );
+        int[] idDelta = data.readUnsignedShortArray( segCount );
+        int[] idRangeOffset = data.readUnsignedShortArray( segCount );
+        
+        //this is the final result
+        //key=glyphId, value is character codes
+        glyphIdToCharacterCode = new int[numGlyphs];
+        
+        long currentPosition = data.getCurrentPosition();
             
-            for( int i=0; i<segCount; i++ )
+        for( int i=0; i<segCount; i++ )
+        {
+            int start = startCount[i];
+            int end = endCount[i];
+            int delta = idDelta[i];
+            int rangeOffset = idRangeOffset[i];
+            if( start != 65535 && end != 65535 )
             {
-                int start = startCount[i];
-                int end = endCount[i];
-                int delta = idDelta[i];
-                int rangeOffset = idRangeOffset[i];
-                if( start != 65535 && end != 65535 )
+                for( int j=start; j<=end; j++ )
                 {
-                    for( int j=start; j<=end; j++ )
+                    if( rangeOffset == 0 )
                     {
-                        if( rangeOffset == 0 )
-                        {
-                            glyphIdToCharacterCode[ ((j+delta)%65536) ]=j;
-                            characterCodeToGlyphId.put(j, ((j+delta)%65536));
-                        }
-                        else
+                        glyphIdToCharacterCode[ ((j+delta)%65536) ]=j;
+                        characterCodeToGlyphId.put(j, ((j+delta)%65536));
+                    }
+                    else
+                    {
+                        long glyphOffset = currentPosition +
+                            ((rangeOffset/2) + //idRangeOffset[i]/2 
+                            (j-start) + //(c - startCount[i])                                   
+                            (i-segCount))*2; //&idRangeOffset[i]); 
+                        data.seek( glyphOffset );
+                        int glyphIndex = data.readUnsignedShort();
+                        if( glyphIndex != 0 )
                         {
-                            long glyphOffset = currentPosition +
-                                ((rangeOffset/2) + //idRangeOffset[i]/2 
-                                (j-start) + //(c - startCount[i])                                   
-                                (i-segCount))*2; //&idRangeOffset[i]); 
-                            data.seek( glyphOffset );
-                            int glyphIndex = data.readUnsignedShort();
-                            if( glyphIndex != 0 )
+                            glyphIndex += delta;
+                            glyphIndex = glyphIndex % 65536;
+                            if( glyphIdToCharacterCode[glyphIndex] == 0 )
                             {
-                                glyphIndex += delta;
-                                glyphIndex = glyphIndex % 65536;
-                                if( glyphIdToCharacterCode[glyphIndex] == 0 )
-                                {
-                                    glyphIdToCharacterCode[glyphIndex] = j;
-                                    characterCodeToGlyphId.put(j, glyphIndex);
-                                }
+                                glyphIdToCharacterCode[glyphIndex] = j;
+                                characterCodeToGlyphId.put(j, glyphIndex);
                             }
-                            
                         }
                     }
                 }
             }
         }
-        else if( subtableFormat == 6 )
+    }
+       
+    /**
+     * Read a format 2 subtable.
+     * @param ttf the TrueTypeFont instance holding the parsed data.
+     * @param data the data stream of the to be parsed ttf font
+     * @param numGlyphs number of glyphs to be read
+     * @throws IOException If there is an error parsing the true type font.
+     */
+    protected void processSubtype2( TrueTypeFont ttf, TTFDataStream data, int numGlyphs ) 
+    throws IOException {
+        int[] subHeaderKeys = new int[256];
+        // ---- keep the Max Index of the SubHeader array to know its length
+        int maxSubHeaderIndex = 0;
+        for( int i=0; i<256; i++)
+        {
+            subHeaderKeys[i] = data.readUnsignedShort();
+            maxSubHeaderIndex = Math.max(maxSubHeaderIndex, (int)(subHeaderKeys[i]/8));
+        }
+        
+        // ---- Read all SubHeaders to avoid useless seek on DataSource
+        SubHeader[] subHeaders = new SubHeader[maxSubHeaderIndex + 1]; 
+        for (int i = 0; i <= maxSubHeaderIndex ; ++i ) 
         {
             int firstCode = data.readUnsignedShort();
             int entryCount = data.readUnsignedShort();
-            glyphIdToCharacterCode = new int[numGlyphs];
-            int[] glyphIdArray = data.readUnsignedShortArray( entryCount );
-            for( int i=0; i<entryCount; i++)
+            short idDelta = data.readSignedShort();
+            int idRangeOffset = data.readUnsignedShort();
+            subHeaders[i] = new SubHeader(firstCode, entryCount, idDelta, idRangeOffset);
+        }
+        long startGlyphIndexOffset = data.getCurrentPosition();
+        glyphIdToCharacterCode = new int[numGlyphs];
+        for ( int i = 0; i <= maxSubHeaderIndex ; ++i )
+        {
+            SubHeader sh = subHeaders[i];
+            int firstCode = sh.getFirstCode();
+            for ( int j = 0 ; j < sh.getEntryCount() ; ++j)
             {
-                glyphIdToCharacterCode[glyphIdArray[i]] = firstCode+i;
-                characterCodeToGlyphId.put((firstCode+i), glyphIdArray[i]);
+                // ---- compute the Character Code
+                int charCode = ( i * 8 );
+                charCode = (charCode << 8 ) + (firstCode + j);
+                
+                // ---- Go to the CharacterCOde position in the Sub Array 
+                //      of the glyphIndexArray 
+                //      glyphIndexArray contains Unsigned Short so add (j * 2) bytes 
+                //      at the index position
+                data.seek(startGlyphIndexOffset + sh.getIdRangeOffset() + (j*2));
+                int p = data.readUnsignedShort();
+                // ---- compute the glyphIndex 
+                p = p + sh.getIdDelta() % 65536;
+                
+                glyphIdToCharacterCode[p] = charCode;
+                characterCodeToGlyphId.put(charCode, p);
             }
         }
-        else
+    }
+       
+    /**
+     * Initialize the CMapEntry when it is a subtype 0
+     * 
+     * @param ttf
+     * @param data
+     * @throws IOException
+     */
+    protected void processSubtype0( TrueTypeFont ttf, TTFDataStream data ) 
+    throws IOException {
+        byte[] glyphMapping = data.read( 256 );
+        glyphIdToCharacterCode = new int[256];
+        for( int i=0;i < glyphMapping.length; i++ )
         {
-            throw new IOException( "Unknown cmap format:" + subtableFormat );
+            int glyphIndex = (glyphMapping[i]+256)%256;
+            glyphIdToCharacterCode[glyphIndex]=i;
+            characterCodeToGlyphId.put(i, glyphIndex);
         }
     }
-    
 
     /**
      * @return Returns the glyphIdToCharacterCode.

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java?rev=997405&r1=997404&r2=997405&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/GlyphTable.java Wed Sep 15 17:27:17 2010
@@ -44,11 +44,9 @@ public class GlyphTable extends TTFTable
     {
         MaximumProfileTable maxp = ttf.getMaximumProfile();
         IndexToLocationTable loc = ttf.getIndexToLocation();
-        PostScriptTable post = ttf.getPostScript();
         long[] offsets = loc.getOffsets();
         int numGlyphs = maxp.getNumGlyphs();
         glyphs = new GlyphData[numGlyphs];
-        String[] glyphNames = post.getGlyphNames(); 
         for( int i=0; i<numGlyphs-1; i++ )
         {
             GlyphData glyph = new GlyphData();

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java?rev=997405&r1=997404&r2=997405&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFDataStream.java Wed Sep 15 17:27:17 2010
@@ -123,9 +123,26 @@ public abstract class TTFDataStream 
     public abstract int readUnsignedShort() throws IOException;
     
     /**
+     * Read an unsigned byte array.
+     * 
+     * @return An unsigned byte array.
+     * @throws IOException If there is an error reading the data.
+     */
+    public int[] readUnsignedByteArray( int length ) throws IOException
+    {
+        int[] array = new int[ length ];
+        for( int i=0; i<length; i++ )
+        {
+            array[i] = read();
+        }
+        return array;
+    }
+    
+    /**
      * Read an unsigned short array.
      * 
      * @param length The length of the array to read.
+     * @param length The length of the array to read.
      * @return An unsigned short array.
      * @throws IOException If there is an error reading the data.
      */

Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java?rev=997405&r1=997404&r2=997405&view=diff
==============================================================================
--- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java (original)
+++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/ttf/TTFParser.java Wed Sep 15 17:27:17 2010
@@ -16,22 +16,23 @@
  */
 package org.apache.fontbox.ttf;
 
-import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
 /**
  * A true type font file parser.
  * 
  * @author Ben Litchfield (ben@benlitchfield.com)
  * @version $Revision: 1.2 $
  */
-public class TTFParser
+public class TTFParser extends AbstractTTFParser
 {   
+    public TTFParser() {
+        super(false);
+    }
+       
+    public TTFParser(boolean isEmbedded) {
+        super(isEmbedded);
+    }
+        
     /**
      * A simple command line program to test parsing of a TTF file. <br/>
      * usage: java org.pdfbox.ttf.TTFParser &lt;ttf-file&gt;
@@ -53,171 +54,16 @@ public class TTFParser
     }
     
     /**
-     * Parse a file and get a true type font.
-     * @param ttfFile The TTF file.
-     * @return A true type font.
-     * @throws IOException If there is an error parsing the true type font.
-     */
-    public TrueTypeFont parseTTF( String ttfFile ) throws IOException
-    {
-        RAFDataStream raf = new RAFDataStream( ttfFile, "r" );
-        return parseTTF( raf );
-    }
-    
-    /**
-     * Parse a file and get a true type font.
-     * @param ttfFile The TTF file.
-     * @return A true type font.
-     * @throws IOException If there is an error parsing the true type font.
-     */
-    public TrueTypeFont parseTTF( File ttfFile ) throws IOException
-    {
-        RAFDataStream raf = new RAFDataStream( ttfFile, "r" );
-        return parseTTF( raf );
-    }
-    
-    /**
-     * Parse a file and get a true type font.
-     * @param ttfData The TTF data to parse.
-     * @return A true type font.
-     * @throws IOException If there is an error parsing the true type font.
+     * {@inheritDoc}
      */
-    public TrueTypeFont parseTTF( InputStream ttfData ) throws IOException
+    protected void parseTables(TrueTypeFont font, TTFDataStream raf) throws IOException 
     {
-        return parseTTF( new MemoryTTFDataStream( ttfData ));
-    }
-    
-    /**
-     * Parse a file and get a true type font.
-     * @param raf The TTF file.
-     * @return A true type font.
-     * @throws IOException If there is an error parsing the true type font.
-     */
-    public TrueTypeFont parseTTF( TTFDataStream raf ) throws IOException
-    {
-        TrueTypeFont font = new TrueTypeFont( raf );
-        font.setVersion( raf.read32Fixed() );
-        int numberOfTables = raf.readUnsignedShort();
-        int searchRange = raf.readUnsignedShort();
-        int entrySelector = raf.readUnsignedShort();
-        int rangeShift = raf.readUnsignedShort();
-        for( int i=0; i<numberOfTables; i++ )
-        {
-            TTFTable table = readTableDirectory( raf );   
-            font.addTable( table );
-        }
-        List<TTFTable> initialized = new ArrayList<TTFTable>();
-        //need to initialize a couple tables in a certain order
-        HeaderTable head = font.getHeader();
-        if (head == null) {
-        	throw new IOException("head is mandatory");
-        }
-        raf.seek( head.getOffset() );
-        head.initData( font, raf );
-        initialized.add( head );
-        
-        HorizontalHeaderTable hh = font.getHorizontalHeader();
-        if (hh == null) {
-        	throw new IOException("hhead is mandatory");
-        }
-        raf.seek( hh.getOffset() );
-        hh.initData( font, raf );
-        initialized.add( hh );
-        
-        MaximumProfileTable maxp = font.getMaximumProfile();
-        if (maxp == null) {
-        	throw new IOException("maxp is mandatory");
-        }
-        raf.seek( maxp.getOffset() );
-        maxp.initData( font, raf );
-        initialized.add( maxp );
-        
-        PostScriptTable post = font.getPostScript();
-        if (post == null) {
-        	throw new IOException("post is mandatory");
-        }
-        raf.seek( post.getOffset() );
-        post.initData( font, raf );
-        initialized.add( post );
-        
-        IndexToLocationTable loc = font.getIndexToLocation();
-        if (loc == null) {
-        	throw new IOException("loca is mandatory");
-        }
-        raf.seek( loc.getOffset() );
-        loc.initData( font, raf );
-        initialized.add( loc );
+        super.parseTables(font, raf);
         
-        Iterator<TTFTable> iter = font.getTables().iterator();
-        while( iter.hasNext() )
-        {
-            TTFTable table = iter.next();
-            if( !initialized.contains( table ) )
-            {
-                raf.seek( table.getOffset() );
-                table.initData( font, raf );
-            }
-        }
-        return font;
-    }
-    
-    private TTFTable readTableDirectory( TTFDataStream raf ) throws IOException
-    {
-        TTFTable retval = null;
-        String tag = raf.readString( 4 );
-        if( tag.equals( CMAPTable.TAG ) )
-        {
-            retval = new CMAPTable();
-        }
-        else if( tag.equals( GlyphTable.TAG ) )
-        {
-            retval = new GlyphTable();
-        }
-        else if( tag.equals( HeaderTable.TAG ) )
-        {
-            retval = new HeaderTable();
-        }
-        else if( tag.equals( HorizontalHeaderTable.TAG ) )
-        {
-            retval = new HorizontalHeaderTable();
-        }
-        else if( tag.equals( HorizontalMetricsTable.TAG ) )
-        {
-            retval = new HorizontalMetricsTable();
+        // check others mandatory tables
+        if ( font.getCMAP() == null ){
+            throw new IOException("cmap is mandatory");
         }
-        else if( tag.equals( IndexToLocationTable.TAG ) )
-        {
-            retval = new IndexToLocationTable();
-        }
-        else if( tag.equals( MaximumProfileTable.TAG ) )
-        {
-            retval = new MaximumProfileTable();
-        }
-        else if( tag.equals( NamingTable.TAG ) )
-        {
-            retval = new NamingTable();
-        }
-        else if( tag.equals( OS2WindowsMetricsTable.TAG ) )
-        {
-            retval = new OS2WindowsMetricsTable();
-        }
-        else if( tag.equals( PostScriptTable.TAG ) )
-        {
-            retval = new PostScriptTable();
-        }
-        else if( tag.equals( DigitalSignatureTable.TAG ) )
-        {
-            retval = new DigitalSignatureTable();
-        }
-        else
-        {
-            //unknown table type but read it anyway.
-            retval = new TTFTable();
-        }
-        retval.setTag( tag );
-        retval.setCheckSum( raf.readUnsignedInt() );
-        retval.setOffset( raf.readUnsignedInt() );
-        retval.setLength( raf.readUnsignedInt() );
-        return retval;
     }
+
 }
\ No newline at end of file