You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2008/09/05 15:06:01 UTC
svn commit: r692435 - in /incubator/tika/trunk: ./ src/main/assembly/
src/main/java/org/apache/tika/parser/asm/
src/main/java/org/apache/tika/sax/ src/main/java/org/apache/tika/sax/xpath/
src/main/resources/ src/main/resources/mime/ src/test/java/org/a...
Author: jukka
Date: Fri Sep 5 06:06:00 2008
New Revision: 692435
URL: http://svn.apache.org/viewvc?rev=692435&view=rev
Log:
TIKA-155: Java class file parser
Added a ClassParser implementation based on the asm library and work by Dave Brosius in JCR-1522.
Added a /descendant:node() feature to the simple XPath parser and modified BodyContentHandler to use that instead of /*//node().
Added:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/
incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java (with props)
incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java (with props)
incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/
incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java (with props)
incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class (with props)
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/pom.xml
incubator/tika/trunk/src/main/assembly/LICENSE.txt
incubator/tika/trunk/src/main/assembly/NOTICE.txt
incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java
incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
incubator/tika/trunk/src/main/resources/tika-config.xml
Modified: incubator/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Fri Sep 5 06:06:00 2008
@@ -68,6 +68,11 @@
29. TIKA-151 - Stream compression support (Jukka Zitting)
+30. TIKA-156 - Some MIME magic patterns are ignored by MimeTypes
+ (Jukka Zitting)
+
+31. TIKA-155 - Java class file parser (Dave Brosius & Jukka Zitting)
+
Release 0.1-incubating - 12/27/2007
1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)
Modified: incubator/tika/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/pom.xml?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/pom.xml (original)
+++ incubator/tika/trunk/pom.xml Fri Sep 5 06:06:00 2008
@@ -204,6 +204,11 @@
<version>3.4.4</version>
</dependency>
<dependency>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ <version>3.1</version>
+ </dependency>
+ <dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.14</version>
Modified: incubator/tika/trunk/src/main/assembly/LICENSE.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/assembly/LICENSE.txt?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/assembly/LICENSE.txt (original)
+++ incubator/tika/trunk/src/main/assembly/LICENSE.txt Fri Sep 5 06:06:00 2008
@@ -322,3 +322,35 @@
not be used in advertising or otherwise to promote the sale, use or other
dealings in this Software without prior written authorization of the
copyright holder.
+
+ASM library (asm)
+
+ Copyright (c) 2000-2005 INRIA, France Telecom
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of the copyright holders nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ THE POSSIBILITY OF SUCH DAMAGE.
Modified: incubator/tika/trunk/src/main/assembly/NOTICE.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/assembly/NOTICE.txt?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/assembly/NOTICE.txt (original)
+++ incubator/tika/trunk/src/main/assembly/NOTICE.txt Fri Sep 5 06:06:00 2008
@@ -14,3 +14,5 @@
Copyright (c) 2003-2005, www.fontbox.org
Copyright (c) 1995-2005 International Business Machines Corporation and others
+
+Copyright (c) 2000-2005 INRIA, France Telecom
Added: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java?rev=692435&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java Fri Sep 5 06:06:00 2008
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AbstractParser;
+import org.objectweb.asm.ClassReader;
+import org.objectweb.asm.ClassVisitor;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for Java .class files.
+ */
+public class ClassParser extends AbstractParser {
+
+ public void parse(
+ InputStream stream, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ try {
+ ClassVisitor visitor = new XHTMLClassVisitor(handler, metadata);
+ ClassReader reader = new ClassReader(stream);
+ reader.accept(visitor, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE);
+ } catch (RuntimeException e) {
+ if (e.getCause() instanceof SAXException) {
+ throw (SAXException) e.getCause();
+ } else {
+ throw new TikaException("Failed to parse a Java class", e);
+ }
+ }
+ }
+
+}
Propchange: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java?rev=692435&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java Fri Sep 5 06:06:00 2008
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.objectweb.asm.AnnotationVisitor;
+import org.objectweb.asm.Attribute;
+import org.objectweb.asm.ClassVisitor;
+import org.objectweb.asm.FieldVisitor;
+import org.objectweb.asm.MethodVisitor;
+import org.objectweb.asm.Opcodes;
+import org.objectweb.asm.Type;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Class visitor that generates XHTML SAX events to describe the
+ * contents of the visited class.
+ */
+class XHTMLClassVisitor implements ClassVisitor {
+
+ private final XHTMLContentHandler xhtml;
+
+ private final Metadata metadata;
+
+ private Type type;
+
+ private String packageName;
+
+ public XHTMLClassVisitor(ContentHandler handler, Metadata metadata) {
+ this.xhtml = new XHTMLContentHandler(handler, metadata);
+ this.metadata = metadata;
+ }
+
+ public void visit(
+ int version, int access, String name, String signature,
+ String superName, String[] interfaces) {
+ type = Type.getObjectType(name);
+
+ String className = type.getClassName();
+ int dot = className.lastIndexOf('.');
+ if (dot != -1) {
+ packageName = className.substring(0, dot);
+ className = className.substring(dot + 1);
+ }
+
+ metadata.set(Metadata.TITLE, className);
+ metadata.set(Metadata.RESOURCE_NAME_KEY, className + ".class");
+
+ try {
+ xhtml.startDocument();
+
+ if (packageName != null) {
+ writeKeyword("package");
+ xhtml.characters(" " + packageName + ";\n");
+ }
+
+ writeAccess(access);
+ if (isSet(access, Opcodes.ACC_INTERFACE)) {
+ writeKeyword("interface");
+ writeSpace();
+ writeType(type);
+ writeSpace();
+ writeInterfaces("extends", interfaces);
+ } else if (isSet(access, Opcodes.ACC_ENUM)) {
+ writeKeyword("enum");
+ writeSpace();
+ writeType(type);
+ writeSpace();
+ } else {
+ writeKeyword("class");
+ writeSpace();
+ writeType(type);
+ writeSpace();
+ if (superName != null) {
+ Type superType = Type.getObjectType(superName);
+ if (!superType.getClassName().equals("java.lang.Object")) {
+ writeKeyword("extends");
+ writeSpace();
+ writeType(superType);
+ writeSpace();
+ }
+ }
+ writeInterfaces("implements", interfaces);
+ }
+ xhtml.characters("{\n");
+ } catch (SAXException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private void writeInterfaces(String keyword, String[] interfaces)
+ throws SAXException {
+ if (interfaces != null && interfaces.length > 0) {
+ writeKeyword(keyword);
+ String separator = " ";
+ for (String iface : interfaces) {
+ xhtml.characters(separator);
+ writeType(Type.getObjectType(iface));
+ separator = ", ";
+ }
+ writeSpace();
+ }
+ }
+
+ public void visitEnd() {
+ try {
+ xhtml.characters("}\n");
+ xhtml.endDocument();
+ } catch (SAXException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Ignored.
+ */
+ public void visitOuterClass(String owner, String name, String desc) {
+ }
+
+ /**
+ * Ignored.
+ */
+ public void visitSource(String source, String debug) {
+ }
+
+
+ /**
+ * Ignored.
+ */
+ public AnnotationVisitor visitAnnotation(String desc, boolean visible) {
+ return null;
+ }
+
+ /**
+ * Ignored.
+ */
+ public void visitAttribute(Attribute attr) {
+ }
+
+ /**
+ * Ignored.
+ */
+ public void visitInnerClass(
+ String name, String outerName, String innerName, int access) {
+ }
+
+ /**
+ * Visits a field.
+ */
+ public FieldVisitor visitField(
+ int access, String name, String desc, String signature,
+ Object value) {
+ if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
+ try {
+ xhtml.characters(" ");
+ writeAccess(access);
+ writeType(Type.getType(desc));
+ writeSpace();
+ writeIdentifier(name);
+
+ if (isSet(access, Opcodes.ACC_STATIC) && value != null) {
+ xhtml.characters(" = ");
+ xhtml.characters(value.toString());
+ }
+
+ writeSemicolon();
+ writeNewline();
+ } catch (SAXException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Visits a method.
+ */
+ public MethodVisitor visitMethod(
+ int access, String name, String desc, String signature,
+ String[] exceptions) {
+ if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
+ try {
+ xhtml.characters(" ");
+ writeAccess(access);
+ writeType(Type.getReturnType(desc));
+ writeSpace();
+ if ("<init>".equals(name)) {
+ writeType(type);
+ } else {
+ writeIdentifier(name);
+ }
+
+ xhtml.characters("(");
+ String separator = "";
+ for (Type arg : Type.getArgumentTypes(desc)) {
+ xhtml.characters(separator);
+ writeType(arg);
+ separator = ", ";
+ }
+ xhtml.characters(")");
+
+ if (exceptions != null && exceptions.length > 0) {
+ writeSpace();
+ writeKeyword("throws");
+ separator = " ";
+ for (String exception : exceptions) {
+ xhtml.characters(separator);
+ writeType(Type.getObjectType(exception));
+ separator = ", ";
+ }
+ }
+
+ writeSemicolon();
+ writeNewline();
+ } catch (SAXException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ return null;
+ }
+
+ private void writeIdentifier(String identifier) throws SAXException {
+ xhtml.startElement("span", "class", "java-identifier");
+ xhtml.characters(identifier);
+ xhtml.endElement("span");
+ }
+
+ private void writeKeyword(String keyword) throws SAXException {
+ xhtml.startElement("span", "class", "java-keyword");
+ xhtml.characters(keyword);
+ xhtml.endElement("span");
+ }
+
+ private void writeSemicolon() throws SAXException {
+ xhtml.characters(";");
+ }
+
+ private void writeSpace() throws SAXException {
+ xhtml.characters(" ");
+ }
+
+ private void writeNewline() throws SAXException {
+ xhtml.characters("\n");
+ }
+
+ private void writeAccess(int access) throws SAXException {
+ writeAccess(access, Opcodes.ACC_PRIVATE, "private");
+ writeAccess(access, Opcodes.ACC_PROTECTED, "protected");
+ writeAccess(access, Opcodes.ACC_PUBLIC, "public");
+ writeAccess(access, Opcodes.ACC_STATIC, "static");
+ writeAccess(access, Opcodes.ACC_FINAL, "final");
+ writeAccess(access, Opcodes.ACC_ABSTRACT, "abstract");
+ writeAccess(access, Opcodes.ACC_SYNCHRONIZED, "synchronized");
+ writeAccess(access, Opcodes.ACC_TRANSIENT, "transient");
+ writeAccess(access, Opcodes.ACC_VOLATILE, "volatile");
+ writeAccess(access, Opcodes.ACC_NATIVE, "native");
+ }
+
+ private void writeAccess(int access, int code, String keyword)
+ throws SAXException {
+ if (isSet(access, code)) {
+ writeKeyword(keyword);
+ xhtml.characters(" ");
+ }
+ }
+
+ private void writeType(Type type) throws SAXException {
+ String name = type.getClassName();
+ if (name.startsWith(packageName + ".")) {
+ xhtml.characters(name.substring(packageName.length() + 1));
+ } else if (name.startsWith("java.lang.")) {
+ xhtml.characters(name.substring("java.lang.".length()));
+ } else {
+ xhtml.characters(name);
+ }
+ }
+
+ private static boolean isSet(int value, int flag) {
+ return (value & flag) != 0;
+ }
+
+}
Propchange: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java Fri Sep 5 06:06:00 2008
@@ -41,7 +41,7 @@
* The XPath matcher used to select the XHTML body contents.
*/
private static final Matcher MATCHER =
- PARSER.parse("/xhtml:html/xhtml:body/*//node()");
+ PARSER.parse("/xhtml:html/xhtml:body/descendant:node()");
/**
* Creates a content handler that passes all XHTML body events to the
Modified: incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java Fri Sep 5 06:06:00 2008
@@ -60,6 +60,10 @@
return TextMatcher.INSTANCE;
} else if (xpath.equals("/node()")) {
return NodeMatcher.INSTANCE;
+ } else if (xpath.equals("/descendant:node()")) {
+ return new CompositeMatcher(
+ NodeMatcher.INSTANCE,
+ new ChildMatcher(new SubtreeMatcher(NodeMatcher.INSTANCE)));
} else if (xpath.equals("/@*")) {
return AttributeMatcher.INSTANCE;
} else if (xpath.length() == 0) {
Modified: incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Fri Sep 5 06:06:00 2008
@@ -425,6 +425,13 @@
<glob pattern="*.tbz2" />
</mime-type>
+ <mime-type type="application/x-tika-java-class">
+ <magic priority="40">
+ <match value="0xcafebabe" type="string" offset="0" />
+ </magic>
+ <glob pattern="*.class" />
+ </mime-type>
+
<mime-type type="application/msword">
<glob pattern="*.doc" />
<alias type="application/vnd.ms-word" />
@@ -522,10 +529,6 @@
<glob pattern="*.spl" />
</mime-type>
- <mime-type type="application/x-java">
- <alias type="application/java" />
- </mime-type>
-
<mime-type type="application/x-koan">
<_comment>SSEYO Koan File</_comment>
<glob pattern="*.skp" />
Modified: incubator/tika/trunk/src/main/resources/tika-config.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/tika-config.xml?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/tika-config.xml (original)
+++ incubator/tika/trunk/src/main/resources/tika-config.xml Fri Sep 5 06:06:00 2008
@@ -121,6 +121,10 @@
<mime>application/x-bzip</mime>
</parser>
+ <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser">
+ <mime>application/x-tika-java-class</mime>
+ </parser>
+
</parsers>
</properties>
\ No newline at end of file
Added: incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java?rev=692435&view=auto
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java (added)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java Fri Sep 5 06:06:00 2008
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+import junit.framework.TestCase;
+
+/**
+ * Test case for parsing Java class files.
+ */
+public class ClassParserTest extends TestCase {
+
+ public void testClassParsing() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = ClassParserTest.class.getResourceAsStream(
+ "/test-documents/AutoDetectParser.class");
+ try {
+ parser.parse(stream, handler, metadata);
+ } finally {
+ stream.close();
+ }
+
+ assertEquals("AutoDetectParser", metadata.get(Metadata.TITLE));
+ assertEquals(
+ "AutoDetectParser.class",
+ metadata.get(Metadata.RESOURCE_NAME_KEY));
+
+ String content = handler.toString();
+ assertTrue(content.contains("package org.apache.tika.parser;"));
+ assertTrue(content.contains(
+ "class AutoDetectParser extends CompositeParser"));
+ assertTrue(content.contains(
+ "private org.apache.tika.mime.MimeTypes types"));
+ assertTrue(content.contains(
+ "public void parse("
+ + "java.io.InputStream, org.xml.sax.ContentHandler,"
+ + " org.apache.tika.metadata.Metadata) throws"
+ + " java.io.IOException, org.xml.sax.SAXException,"
+ + " org.apache.tika.exception.TikaException;"));
+ assertTrue(content.contains(
+ "private byte[] getPrefix(java.io.InputStream, int)"
+ + " throws java.io.IOException;"));
+ }
+
+}
Propchange: incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class?rev=692435&view=auto
==============================================================================
Binary file - no diff available.
Propchange: incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream