You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2014/06/06 17:24:03 UTC
svn commit: r1600917 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
main/java/org/apache/tika/parser/font/TrueTypeParser.java
test/java/org/apache/tika/parser/font/FontParsersTest.java
Author: nick
Date: Fri Jun 6 15:24:03 2014
New Revision: 1600917
URL: http://svn.apache.org/r1600917
Log:
TIKA-1325 Have the TTF parser pull out a little bit more, and have it do so similar to the AFM one does, plus add some TTF tests
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java?rev=1600917&r1=1600916&r2=1600917&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java Fri Jun 6 15:24:03 2014
@@ -50,9 +50,11 @@ public class AdobeFontMetricParser exten
// TIKA-1325 Replace these with properties, from a well known standard
static final String MET_AVG_CHAR_WIDTH = "AvgCharacterWidth";
static final String MET_DOC_VERSION = "DocVersion";
+ static final String MET_PS_NAME = "PSName";
static final String MET_FONT_NAME = "FontName";
static final String MET_FONT_FULL_NAME = "FontFullName";
static final String MET_FONT_FAMILY_NAME = "FontFamilyName";
+ static final String MET_FONT_SUB_FAMILY_NAME = "FontSubFamilyName";
static final String MET_FONT_VERSION = "FontVersion";
static final String MET_FONT_WEIGHT = "FontWeight";
static final String MET_FONT_NOTICE = "FontNotice";
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=1600917&r1=1600916&r2=1600917&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Fri Jun 6 15:24:03 2014
@@ -21,6 +21,8 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
+import org.apache.fontbox.ttf.NameRecord;
+import org.apache.fontbox.ttf.NamingTable;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.tika.exception.TikaException;
@@ -69,10 +71,36 @@ public class TrueTypeParser extends Abst
// Report the details of the font
metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
- metadata.set(TikaCoreProperties.CREATED, font.getHeader().getCreated().getTime());
- metadata.set(
- TikaCoreProperties.MODIFIED,
+ metadata.set(TikaCoreProperties.CREATED,
+ font.getHeader().getCreated().getTime());
+ metadata.set(TikaCoreProperties.MODIFIED,
font.getHeader().getModified().getTime());
+ metadata.set(AdobeFontMetricParser.MET_DOC_VERSION,
+ Float.toString(font.getHeader().getVersion()));
+
+ // Pull out the naming info
+ NamingTable fontNaming = font.getNaming();
+ for (NameRecord nr : fontNaming.getNameRecords()) {
+ if (nr.getNameId() == NameRecord.NAME_FONT_FAMILY_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_FONT_FAMILY_NAME, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_FONT_SUB_FAMILY_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_FONT_SUB_FAMILY_NAME, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_FULL_FONT_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_FONT_NAME, nr.getString());
+ metadata.set(TikaCoreProperties.TITLE, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_POSTSCRIPT_NAME) {
+ metadata.set(AdobeFontMetricParser.MET_PS_NAME, nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_COPYRIGHT) {
+ metadata.set("Copyright", nr.getString());
+ }
+ if (nr.getNameId() == NameRecord.NAME_TRADEMARK) {
+ metadata.set("Trademark", nr.getString());
+ }
+ }
// For now, we only output metadata, no textual contents
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java?rev=1600917&r1=1600916&r2=1600917&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java Fri Jun 6 15:24:03 2014
@@ -69,4 +69,42 @@ public class FontParsersTest {
assertTrue(content.contains("This is a comment in a sample file"));
assertTrue(content.contains("UniqueID 12345"));
}
+
+ @Test
+ public void testTTFParsing() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ ParseContext context = new ParseContext();
+ TikaInputStream stream = TikaInputStream.get(
+ FontParsersTest.class.getResource(
+ "/test-documents/testTrueType.ttf"));
+
+ try {
+ parser.parse(stream, handler, metadata, context);
+ } finally {
+ stream.close();
+ }
+
+ assertEquals("application/x-font-ttf", metadata.get(Metadata.CONTENT_TYPE));
+ assertEquals("NewBaskervilleEF-Roman", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("1904-01-01T00:00:00Z", metadata.get(Metadata.CREATION_DATE));
+
+ assertEquals("NewBaskervilleEF-Roman", metadata.get(MET_FONT_NAME));
+ assertEquals("NewBaskerville", metadata.get(MET_FONT_FAMILY_NAME));
+ assertEquals("Regular", metadata.get(MET_FONT_SUB_FAMILY_NAME));
+ assertEquals("NewBaskervilleEF-Roman", metadata.get(MET_PS_NAME));
+
+ assertEquals("Copyright", metadata.get("Copyright").substring(0, 9));
+ assertEquals("ITC New Baskerville", metadata.get("Trademark").substring(0, 19));
+
+ // Not extracted
+ assertEquals(null, metadata.get(MET_FONT_FULL_NAME));
+ assertEquals(null, metadata.get(MET_FONT_WEIGHT));
+ assertEquals(null, metadata.get(MET_FONT_VERSION));
+
+ // Currently, the parser doesn't extract any contents
+ String content = handler.toString();
+ assertEquals("", content);
+ }
}