You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2008/09/04 18:46:44 UTC
svn commit: r692152 -
/incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java
Author: jukka
Date: Thu Sep 4 09:46:44 2008
New Revision: 692152
URL: http://svn.apache.org/viewvc?rev=692152&view=rev
Log:
TIKA-149: Parser for zip files
Moved zip entry handling to a separate private method.
Added the entry name to the metadata passed to the delegate parser.
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java?rev=692152&r1=692151&r2=692152&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/zip/ZipParser.java Thu Sep 4 09:46:44 2008
@@ -41,28 +41,35 @@
public void parse(InputStream stream, ContentHandler handler, Metadata metadata)
throws IOException, TikaException, SAXException {
-
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
ZipInputStream zis = new ZipInputStream(stream);
ZipEntry ze;
while ((ze = zis.getNextEntry()) != null) {
- xhtml.startElement("div", "class", "file");
- xhtml.element("h1", ze.getName());
-
- ContentHandler content = new BodyContentHandler();
- getParser().parse(new CloseShieldInputStream(zis), content, new Metadata());
-
- xhtml.element("content", content.toString());
- xhtml.endElement("div");
-
+ parseEntry(xhtml, ze, zis);
zis.closeEntry();
}
zis.close();
+
xhtml.endDocument();
}
+ private void parseEntry(
+ XHTMLContentHandler xhtml, ZipEntry entry, InputStream stream)
+ throws IOException, TikaException, SAXException {
+ xhtml.startElement("div", "class", "file");
+ xhtml.element("h1", entry.getName());
+
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, entry.getName());
+ ContentHandler content = new BodyContentHandler();
+ getParser().parse(new CloseShieldInputStream(stream), content, metadata);
+ xhtml.element("content", content.toString());
+
+ xhtml.endElement("div");
+ }
+
public Parser getParser() {
if (parser == null)
{