You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/08 23:27:37 UTC

svn commit: r995259 - in /tika/trunk: tika-core/src/main/resources/org/apache/tika/mime/ tika-parsers/src/main/java/org/apache/tika/parser/font/ tika-parsers/src/main/resources/META-INF/services/

Author: jukka
Date: Wed Sep  8 21:27:37 2010
New Revision: 995259

URL: http://svn.apache.org/viewvc?rev=995259&view=rev
Log:
TIKA-507: Parser for font files

Add a bare-bones TrueTypeParser class and improve related media type information

Added:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
Modified:
    tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser

Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=995259&r1=995258&r2=995259&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Sep  8 21:27:37 2010
@@ -2309,9 +2309,13 @@
   <mime-type type="application/x-font-linux-psf">
     <glob pattern="*.psf"/>
   </mime-type>
+
   <mime-type type="application/x-font-otf">
+    <acronym>OTF</acronym>
+    <comment>OpenType Font</comment>
     <glob pattern="*.otf"/>
   </mime-type>
+
   <mime-type type="application/x-font-pcf">
     <glob pattern="*.pcf"/>
   </mime-type>
@@ -2320,10 +2324,17 @@
   </mime-type>
   <mime-type type="application/x-font-speedo"/>
   <mime-type type="application/x-font-sunos-news"/>
+
   <mime-type type="application/x-font-ttf">
+    <acronym>TTF</acronym>
+    <comment>TrueType Font</comment>
     <glob pattern="*.ttf"/>
     <glob pattern="*.ttc"/>
+    <magic priority="40">
+      <match value="0x00010000" type="string" offset="0"/>
+    </magic>
   </mime-type>
+
   <mime-type type="application/x-font-type1">
     <glob pattern="*.pfa"/>
     <glob pattern="*.pfb"/>

Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=995259&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Wed Sep  8 21:27:37 2010
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.font;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.fontbox.ttf.TTFParser;
+import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for TrueType font files (TTF).
+ */
+public class TrueTypeParser implements Parser {
+
+    private static final MediaType TYPE =
+        MediaType.application("x-font-ttf");
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.singleton(TYPE);
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        TrueTypeFont font;
+        TikaInputStream tis = TikaInputStream.get(stream);
+        TTFParser parser = new TTFParser();
+        if (tis.hasFile()) {
+            font = parser.parseTTF(tis.getFile());
+        } else {
+            font = parser.parseTTF(stream);
+        }
+
+        metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
+        metadata.set(DublinCore.DATE, font.getHeader().getCreated().getTime());
+        metadata.set(
+                Property.internalDate(DublinCore.MODIFIED),
+                font.getHeader().getModified().getTime());
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+    /**
+     * @deprecated This method will be removed in Apache Tika 1.0.
+     */
+    public void parse(
+            InputStream stream, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        parse(stream, handler, metadata, new ParseContext());
+    }
+
+}

Modified: tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=995259&r1=995258&r2=995259&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (original)
+++ tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser Wed Sep  8 21:27:37 2010
@@ -19,6 +19,7 @@ org.apache.tika.parser.audio.MidiParser
 org.apache.tika.parser.dwg.DWGParser
 org.apache.tika.parser.epub.EpubParser
 org.apache.tika.parser.feed.FeedParser
+org.apache.tika.parser.font.TrueTypeParser
 org.apache.tika.parser.html.HtmlParser
 org.apache.tika.parser.image.ImageParser
 org.apache.tika.parser.image.TiffParser