You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/06/06 17:24:03 UTC

svn commit: r1600917 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java main/java/org/apache/tika/parser/font/TrueTypeParser.java test/java/org/apache/tika/parser/font/FontParsersTest.java

Author: nick
Date: Fri Jun  6 15:24:03 2014
New Revision: 1600917

URL: http://svn.apache.org/r1600917
Log:
TIKA-1325 Have the TTF parser pull out a little bit more, and have it do so similar to the AFM one does, plus add some TTF tests

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java?rev=1600917&r1=1600916&r2=1600917&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java Fri Jun  6 15:24:03 2014
@@ -50,9 +50,11 @@ public class AdobeFontMetricParser exten
     // TIKA-1325 Replace these with properties, from a well known standard
     static final String MET_AVG_CHAR_WIDTH = "AvgCharacterWidth";
     static final String MET_DOC_VERSION = "DocVersion";
+    static final String MET_PS_NAME = "PSName";
     static final String MET_FONT_NAME = "FontName";
     static final String MET_FONT_FULL_NAME = "FontFullName";
     static final String MET_FONT_FAMILY_NAME = "FontFamilyName";
+    static final String MET_FONT_SUB_FAMILY_NAME = "FontSubFamilyName";
     static final String MET_FONT_VERSION = "FontVersion";
     static final String MET_FONT_WEIGHT = "FontWeight";
     static final String MET_FONT_NOTICE = "FontNotice";

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=1600917&r1=1600916&r2=1600917&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Fri Jun  6 15:24:03 2014
@@ -21,6 +21,8 @@ import java.io.InputStream;
 import java.util.Collections;
 import java.util.Set;
 
+import org.apache.fontbox.ttf.NameRecord;
+import org.apache.fontbox.ttf.NamingTable;
 import org.apache.fontbox.ttf.TTFParser;
 import org.apache.fontbox.ttf.TrueTypeFont;
 import org.apache.tika.exception.TikaException;
@@ -69,10 +71,36 @@ public class TrueTypeParser extends Abst
 
         // Report the details of the font
         metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
-        metadata.set(TikaCoreProperties.CREATED, font.getHeader().getCreated().getTime());
-        metadata.set(
-                TikaCoreProperties.MODIFIED,
+        metadata.set(TikaCoreProperties.CREATED, 
+                font.getHeader().getCreated().getTime());
+        metadata.set(TikaCoreProperties.MODIFIED,
                 font.getHeader().getModified().getTime());
+        metadata.set(AdobeFontMetricParser.MET_DOC_VERSION,
+                Float.toString(font.getHeader().getVersion()));
+        
+        // Pull out the naming info
+        NamingTable fontNaming = font.getNaming();
+        for (NameRecord nr : fontNaming.getNameRecords()) {
+            if (nr.getNameId() == NameRecord.NAME_FONT_FAMILY_NAME) {
+                metadata.set(AdobeFontMetricParser.MET_FONT_FAMILY_NAME, nr.getString());
+            }
+            if (nr.getNameId() == NameRecord.NAME_FONT_SUB_FAMILY_NAME) {
+                metadata.set(AdobeFontMetricParser.MET_FONT_SUB_FAMILY_NAME, nr.getString());
+            }
+            if (nr.getNameId() == NameRecord.NAME_FULL_FONT_NAME) {
+                metadata.set(AdobeFontMetricParser.MET_FONT_NAME, nr.getString());
+                metadata.set(TikaCoreProperties.TITLE, nr.getString());
+            }
+            if (nr.getNameId() == NameRecord.NAME_POSTSCRIPT_NAME) {
+                metadata.set(AdobeFontMetricParser.MET_PS_NAME, nr.getString());
+            }
+            if (nr.getNameId() == NameRecord.NAME_COPYRIGHT) {
+                metadata.set("Copyright", nr.getString());
+            }
+            if (nr.getNameId() == NameRecord.NAME_TRADEMARK) {
+                metadata.set("Trademark", nr.getString());
+            }
+        }
         
         // For now, we only output metadata, no textual contents
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java?rev=1600917&r1=1600916&r2=1600917&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java Fri Jun  6 15:24:03 2014
@@ -69,4 +69,42 @@ public class FontParsersTest {
         assertTrue(content.contains("This is a comment in a sample file"));
         assertTrue(content.contains("UniqueID 12345"));
     }
+    
+    @Test
+    public void testTTFParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        TikaInputStream stream = TikaInputStream.get(
+                FontParsersTest.class.getResource(
+                        "/test-documents/testTrueType.ttf"));
+
+        try {
+            parser.parse(stream, handler, metadata, context);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("application/x-font-ttf", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("NewBaskervilleEF-Roman", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("1904-01-01T00:00:00Z",   metadata.get(Metadata.CREATION_DATE));
+        
+        assertEquals("NewBaskervilleEF-Roman", metadata.get(MET_FONT_NAME));
+        assertEquals("NewBaskerville",         metadata.get(MET_FONT_FAMILY_NAME));
+        assertEquals("Regular",                metadata.get(MET_FONT_SUB_FAMILY_NAME));
+        assertEquals("NewBaskervilleEF-Roman", metadata.get(MET_PS_NAME));
+        
+        assertEquals("Copyright",           metadata.get("Copyright").substring(0, 9));
+        assertEquals("ITC New Baskerville", metadata.get("Trademark").substring(0, 19));
+        
+        // Not extracted
+        assertEquals(null, metadata.get(MET_FONT_FULL_NAME));
+        assertEquals(null, metadata.get(MET_FONT_WEIGHT));
+        assertEquals(null, metadata.get(MET_FONT_VERSION));
+
+        // Currently, the parser doesn't extract any contents
+        String content = handler.toString();
+        assertEquals("", content);
+    }
 }