You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2008/09/04 18:46:44 UTC

svn commit: r692152 - /incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java

Author: jukka
Date: Thu Sep  4 09:46:44 2008
New Revision: 692152

URL: http://svn.apache.org/viewvc?rev=692152&view=rev
Log:
TIKA-149: Parser for zip files 

Moved zip entry handling to a separate private method.

Added the entry name to the metadata passed to the delegate parser.

Modified:
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java?rev=692152&r1=692151&r2=692152&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java Thu Sep  4 09:46:44 2008
@@ -41,28 +41,35 @@
 
     public void parse(InputStream stream, ContentHandler handler, Metadata metadata)
             throws IOException, TikaException, SAXException {
-
         XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
         xhtml.startDocument();
 
         ZipInputStream zis = new ZipInputStream(stream);
         ZipEntry ze;
         while ((ze = zis.getNextEntry()) != null) {
-            xhtml.startElement("div", "class", "file");
-            xhtml.element("h1", ze.getName());
-
-            ContentHandler content = new BodyContentHandler();
-            getParser().parse(new CloseShieldInputStream(zis), content, new Metadata());
-
-            xhtml.element("content", content.toString());
-            xhtml.endElement("div");
-
+            parseEntry(xhtml, ze, zis);
             zis.closeEntry();
         }
         zis.close();
+
         xhtml.endDocument();
     }
 
+    private void parseEntry(
+            XHTMLContentHandler xhtml, ZipEntry entry, InputStream stream)
+            throws IOException, TikaException, SAXException {
+        xhtml.startElement("div", "class", "file");
+        xhtml.element("h1", entry.getName());
+
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, entry.getName());
+        ContentHandler content = new BodyContentHandler();
+        getParser().parse(new CloseShieldInputStream(stream), content, metadata);
+        xhtml.element("content", content.toString());
+
+        xhtml.endElement("div");
+    }
+
     public Parser getParser() {
         if (parser == null)
         {