You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/08 23:27:37 UTC
svn commit: r995259 - in /tika/trunk:
tika-core/src/main/resources/org/apache/tika/mime/
tika-parsers/src/main/java/org/apache/tika/parser/font/
tika-parsers/src/main/resources/META-INF/services/
Author: jukka
Date: Wed Sep 8 21:27:37 2010
New Revision: 995259
URL: http://svn.apache.org/viewvc?rev=995259&view=rev
Log:
TIKA-507: Parser for font files
Add a bare-bones TrueTypeParser class and improve related media type information
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
Modified: tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=995259&r1=995258&r2=995259&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original)
+++ tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Wed Sep 8 21:27:37 2010
@@ -2309,9 +2309,13 @@
<mime-type type="application/x-font-linux-psf">
<glob pattern="*.psf"/>
</mime-type>
+
<mime-type type="application/x-font-otf">
+ <acronym>OTF</acronym>
+ <comment>OpenType Font</comment>
<glob pattern="*.otf"/>
</mime-type>
+
<mime-type type="application/x-font-pcf">
<glob pattern="*.pcf"/>
</mime-type>
@@ -2320,10 +2324,17 @@
</mime-type>
<mime-type type="application/x-font-speedo"/>
<mime-type type="application/x-font-sunos-news"/>
+
<mime-type type="application/x-font-ttf">
+ <acronym>TTF</acronym>
+ <comment>TrueType Font</comment>
<glob pattern="*.ttf"/>
<glob pattern="*.ttc"/>
+ <magic priority="40">
+ <match value="0x00010000" type="string" offset="0"/>
+ </magic>
</mime-type>
+
<mime-type type="application/x-font-type1">
<glob pattern="*.pfa"/>
<glob pattern="*.pfb"/>
Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=995259&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Wed Sep 8 21:27:37 2010
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.font;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.fontbox.ttf.TTFParser;
+import org.apache.fontbox.ttf.TrueTypeFont;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for TrueType font files (TTF).
+ */
+public class TrueTypeParser implements Parser {
+
+ private static final MediaType TYPE =
+ MediaType.application("x-font-ttf");
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(TYPE);
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ public void parse(
+ InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ TrueTypeFont font;
+ TikaInputStream tis = TikaInputStream.get(stream);
+ TTFParser parser = new TTFParser();
+ if (tis.hasFile()) {
+ font = parser.parseTTF(tis.getFile());
+ } else {
+ font = parser.parseTTF(stream);
+ }
+
+ metadata.set(Metadata.CONTENT_TYPE, TYPE.toString());
+ metadata.set(DublinCore.DATE, font.getHeader().getCreated().getTime());
+ metadata.set(
+ Property.internalDate(DublinCore.MODIFIED),
+ font.getHeader().getModified().getTime());
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ xhtml.endDocument();
+ }
+
+ /**
+ * @deprecated This method will be removed in Apache Tika 1.0.
+ */
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ parse(stream, handler, metadata, new ParseContext());
+ }
+
+}
Modified: tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=995259&r1=995258&r2=995259&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser (original)
+++ tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser Wed Sep 8 21:27:37 2010
@@ -19,6 +19,7 @@ org.apache.tika.parser.audio.MidiParser
org.apache.tika.parser.dwg.DWGParser
org.apache.tika.parser.epub.EpubParser
org.apache.tika.parser.feed.FeedParser
+org.apache.tika.parser.font.TrueTypeParser
org.apache.tika.parser.html.HtmlParser
org.apache.tika.parser.image.ImageParser
org.apache.tika.parser.image.TiffParser