You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2008/09/05 15:06:01 UTC

svn commit: r692435 - in /incubator/tika/trunk: ./ src/main/assembly/ src/main/java/org/apache/tika/parser/asm/ src/main/java/org/apache/tika/sax/ src/main/java/org/apache/tika/sax/xpath/ src/main/resources/ src/main/resources/mime/ src/test/java/org/a...

Author: jukka
Date: Fri Sep  5 06:06:00 2008
New Revision: 692435

URL: http://svn.apache.org/viewvc?rev=692435&view=rev
Log:
TIKA-155: Java class file parser

Added a ClassParser implementation based on the asm library and work by Dave Brosius in JCR-1522.

Added a /descendant:node() feature to the simple XPath parser and modified BodyContentHandler to use that instead of /*//node().

Added:
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java   (with props)
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java   (with props)
    incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/
    incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java   (with props)
    incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class   (with props)
Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/pom.xml
    incubator/tika/trunk/src/main/assembly/LICENSE.txt
    incubator/tika/trunk/src/main/assembly/NOTICE.txt
    incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java
    incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
    incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
    incubator/tika/trunk/src/main/resources/tika-config.xml

Modified: incubator/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Fri Sep  5 06:06:00 2008
@@ -68,6 +68,11 @@
 
 29. TIKA-151 - Stream compression support (Jukka Zitting)
 
+30. TIKA-156 - Some MIME magic patterns are ignored by MimeTypes
+               (Jukka Zitting)
+
+31. TIKA-155 - Java class file parser (Dave Brosius & Jukka Zitting)
+
 Release 0.1-incubating - 12/27/2007
 
 1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)

Modified: incubator/tika/trunk/pom.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/pom.xml?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/pom.xml (original)
+++ incubator/tika/trunk/pom.xml Fri Sep  5 06:06:00 2008
@@ -204,6 +204,11 @@
       <version>3.4.4</version>
     </dependency>
     <dependency>
+      <groupId>asm</groupId>
+      <artifactId>asm</artifactId>
+      <version>3.1</version>
+    </dependency>
+    <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
       <version>1.2.14</version>

Modified: incubator/tika/trunk/src/main/assembly/LICENSE.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/assembly/LICENSE.txt?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/assembly/LICENSE.txt (original)
+++ incubator/tika/trunk/src/main/assembly/LICENSE.txt Fri Sep  5 06:06:00 2008
@@ -322,3 +322,35 @@
     not be used in advertising or otherwise to promote the sale, use or other
     dealings in this Software without prior written authorization of the
     copyright holder.
+
+ASM library (asm)
+
+    Copyright (c) 2000-2005 INRIA, France Telecom
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    3. Neither the name of the copyright holders nor the names of its
+       contributors may be used to endorse or promote products derived from
+       this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+    THE POSSIBILITY OF SUCH DAMAGE.

Modified: incubator/tika/trunk/src/main/assembly/NOTICE.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/assembly/NOTICE.txt?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/assembly/NOTICE.txt (original)
+++ incubator/tika/trunk/src/main/assembly/NOTICE.txt Fri Sep  5 06:06:00 2008
@@ -14,3 +14,5 @@
 Copyright (c) 2003-2005, www.fontbox.org
 
 Copyright (c) 1995-2005 International Business Machines Corporation and others
+
+Copyright (c) 2000-2005 INRIA, France Telecom

Added: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java?rev=692435&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java Fri Sep  5 06:06:00 2008
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AbstractParser;
+import org.objectweb.asm.ClassReader;
+import org.objectweb.asm.ClassVisitor;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for Java .class files.
+ */
+public class ClassParser extends AbstractParser {
+
+    public void parse(
+            InputStream stream, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        try {
+            ClassVisitor visitor = new XHTMLClassVisitor(handler, metadata);
+            ClassReader reader = new ClassReader(stream);
+            reader.accept(visitor, ClassReader.SKIP_FRAMES | ClassReader.SKIP_CODE);
+        } catch (RuntimeException e) {
+            if (e.getCause() instanceof SAXException) {
+                throw (SAXException) e.getCause();
+            } else {
+                throw new TikaException("Failed to parse a Java class", e);
+            }
+        }
+    }
+
+}

Propchange: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/ClassParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java?rev=692435&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java Fri Sep  5 06:06:00 2008
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.objectweb.asm.AnnotationVisitor;
+import org.objectweb.asm.Attribute;
+import org.objectweb.asm.ClassVisitor;
+import org.objectweb.asm.FieldVisitor;
+import org.objectweb.asm.MethodVisitor;
+import org.objectweb.asm.Opcodes;
+import org.objectweb.asm.Type;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Class visitor that generates XHTML SAX events to describe the
+ * contents of the visited class.
+ */
+class XHTMLClassVisitor implements ClassVisitor {
+
+    private final XHTMLContentHandler xhtml;
+
+    private final Metadata metadata;
+
+    private Type type;
+
+    private String packageName;
+
+    public XHTMLClassVisitor(ContentHandler handler, Metadata metadata) {
+        this.xhtml = new XHTMLContentHandler(handler, metadata);
+        this.metadata = metadata;
+    }
+
+    public void visit(
+            int version, int access, String name, String signature,
+            String superName, String[] interfaces) {
+        type = Type.getObjectType(name);
+
+        String className = type.getClassName();
+        int dot = className.lastIndexOf('.');
+        if (dot != -1) {
+            packageName = className.substring(0, dot);
+            className = className.substring(dot + 1);
+        }
+
+        metadata.set(Metadata.TITLE, className);
+        metadata.set(Metadata.RESOURCE_NAME_KEY, className + ".class");
+
+        try {
+            xhtml.startDocument();
+
+            if (packageName != null) {
+                writeKeyword("package");
+                xhtml.characters(" " + packageName + ";\n");
+            }
+
+            writeAccess(access);
+            if (isSet(access, Opcodes.ACC_INTERFACE)) {
+                writeKeyword("interface");
+                writeSpace();
+                writeType(type);
+                writeSpace();
+                writeInterfaces("extends", interfaces);
+            } else if (isSet(access, Opcodes.ACC_ENUM)) {
+                writeKeyword("enum");
+                writeSpace();
+                writeType(type);
+                writeSpace();
+            } else {
+                writeKeyword("class");
+                writeSpace();
+                writeType(type);
+                writeSpace();
+                if (superName != null) {
+                    Type superType = Type.getObjectType(superName);
+                    if (!superType.getClassName().equals("java.lang.Object")) {
+                        writeKeyword("extends");
+                        writeSpace();
+                        writeType(superType);
+                        writeSpace();
+                    }
+                }
+                writeInterfaces("implements", interfaces);
+            }
+            xhtml.characters("{\n");
+        } catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private void writeInterfaces(String keyword, String[] interfaces)
+            throws SAXException {
+        if (interfaces != null && interfaces.length > 0) {
+            writeKeyword(keyword);
+            String separator = " ";
+            for (String iface : interfaces) {
+                xhtml.characters(separator);
+                writeType(Type.getObjectType(iface));
+                separator = ", ";
+            }
+            writeSpace();
+        }
+    }
+
+    public void visitEnd() {
+        try {
+            xhtml.characters("}\n");
+            xhtml.endDocument();
+        } catch (SAXException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitOuterClass(String owner, String name, String desc) {
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitSource(String source, String debug) {
+    }
+
+
+    /**
+     * Ignored.
+     */
+    public AnnotationVisitor visitAnnotation(String desc, boolean visible) {
+        return null;
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitAttribute(Attribute attr) {
+    }
+
+    /**
+     * Ignored.
+     */
+    public void visitInnerClass(
+            String name, String outerName, String innerName, int access) {
+    }
+
+    /**
+     * Visits a field.
+     */
+    public FieldVisitor visitField(
+            int access, String name, String desc, String signature,
+            Object value) {
+        if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
+            try {
+                xhtml.characters("    ");
+                writeAccess(access);
+                writeType(Type.getType(desc));
+                writeSpace();
+                writeIdentifier(name);
+
+                if (isSet(access, Opcodes.ACC_STATIC) && value != null) {
+                    xhtml.characters(" = ");
+                    xhtml.characters(value.toString());
+                }
+
+                writeSemicolon();
+                writeNewline();
+            } catch (SAXException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        return null;
+    }
+
+    /**
+     * Visits a method.
+     */
+    public MethodVisitor visitMethod(
+            int access, String name, String desc, String signature,
+            String[] exceptions) {
+        if (!isSet(access, Opcodes.ACC_SYNTHETIC)) {
+            try {
+                xhtml.characters("    ");
+                writeAccess(access);
+                writeType(Type.getReturnType(desc));
+                writeSpace();
+                if ("<init>".equals(name)) {
+                    writeType(type);
+                } else {
+                    writeIdentifier(name);
+                }
+
+                xhtml.characters("(");
+                String separator = "";
+                for (Type arg : Type.getArgumentTypes(desc)) {
+                    xhtml.characters(separator);
+                    writeType(arg);
+                    separator = ", ";
+                }
+                xhtml.characters(")");
+
+                if (exceptions != null && exceptions.length > 0) {
+                    writeSpace();
+                    writeKeyword("throws");
+                    separator = " ";
+                    for (String exception : exceptions) {
+                        xhtml.characters(separator);
+                        writeType(Type.getObjectType(exception));
+                        separator = ", ";
+                    }
+                }
+
+                writeSemicolon();
+                writeNewline();
+            } catch (SAXException e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        return null;
+    }
+
+    private void writeIdentifier(String identifier) throws SAXException {
+        xhtml.startElement("span", "class", "java-identifier");
+        xhtml.characters(identifier);
+        xhtml.endElement("span");
+    }
+
+    private void writeKeyword(String keyword) throws SAXException {
+        xhtml.startElement("span", "class", "java-keyword");
+        xhtml.characters(keyword);
+        xhtml.endElement("span");
+    }
+
+    private void writeSemicolon() throws SAXException {
+        xhtml.characters(";");
+    }
+
+    private void writeSpace() throws SAXException {
+        xhtml.characters(" ");
+    }
+
+    private void writeNewline() throws SAXException {
+        xhtml.characters("\n");
+    }
+
+    private void writeAccess(int access) throws SAXException {
+        writeAccess(access, Opcodes.ACC_PRIVATE, "private");
+        writeAccess(access, Opcodes.ACC_PROTECTED, "protected");
+        writeAccess(access, Opcodes.ACC_PUBLIC, "public");
+        writeAccess(access, Opcodes.ACC_STATIC, "static");
+        writeAccess(access, Opcodes.ACC_FINAL, "final");
+        writeAccess(access, Opcodes.ACC_ABSTRACT, "abstract");
+        writeAccess(access, Opcodes.ACC_SYNCHRONIZED, "synchronized");
+        writeAccess(access, Opcodes.ACC_TRANSIENT, "transient");
+        writeAccess(access, Opcodes.ACC_VOLATILE, "volatile");
+        writeAccess(access, Opcodes.ACC_NATIVE, "native");
+    }
+
+    private void writeAccess(int access, int code, String keyword)
+            throws SAXException {
+        if (isSet(access, code)) {
+            writeKeyword(keyword);
+            xhtml.characters(" ");
+        }
+    }
+
+    private void writeType(Type type) throws SAXException {
+        String name = type.getClassName();
+        if (name.startsWith(packageName + ".")) {
+            xhtml.characters(name.substring(packageName.length() + 1));
+        } else if (name.startsWith("java.lang.")) {
+            xhtml.characters(name.substring("java.lang.".length()));
+        } else {
+            xhtml.characters(name);
+        }
+    }
+
+    private static boolean isSet(int value, int flag) {
+        return (value & flag) != 0;
+    }
+
+}

Propchange: incubator/tika/trunk/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/sax/BodyContentHandler.java Fri Sep  5 06:06:00 2008
@@ -41,7 +41,7 @@
      * The XPath matcher used to select the XHTML body contents.
      */
     private static final Matcher MATCHER =
-        PARSER.parse("/xhtml:html/xhtml:body/*//node()");
+        PARSER.parse("/xhtml:html/xhtml:body/descendant:node()");
 
     /**
      * Creates a content handler that passes all XHTML body events to the

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/sax/xpath/XPathParser.java Fri Sep  5 06:06:00 2008
@@ -60,6 +60,10 @@
             return TextMatcher.INSTANCE;
         } else if (xpath.equals("/node()")) {
             return NodeMatcher.INSTANCE;
+        } else if (xpath.equals("/descendant:node()")) {
+            return new CompositeMatcher(
+                    NodeMatcher.INSTANCE,
+                    new ChildMatcher(new SubtreeMatcher(NodeMatcher.INSTANCE)));
         } else if (xpath.equals("/@*")) {
             return AttributeMatcher.INSTANCE;
         } else if (xpath.length() == 0) {

Modified: incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Fri Sep  5 06:06:00 2008
@@ -425,6 +425,13 @@
     <glob pattern="*.tbz2" />
   </mime-type>
 
+  <mime-type type="application/x-tika-java-class">
+    <magic priority="40">
+      <match value="0xcafebabe" type="string" offset="0" />
+    </magic>
+    <glob pattern="*.class" />
+  </mime-type>
+
   <mime-type type="application/msword">
     <glob pattern="*.doc" />
     <alias type="application/vnd.ms-word" />
@@ -522,10 +529,6 @@
     <glob pattern="*.spl" />
   </mime-type>
 
-  <mime-type type="application/x-java">
-    <alias type="application/java" />
-  </mime-type>
-
   <mime-type type="application/x-koan">
     <_comment>SSEYO Koan File</_comment>
     <glob pattern="*.skp" />

Modified: incubator/tika/trunk/src/main/resources/tika-config.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/tika-config.xml?rev=692435&r1=692434&r2=692435&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/tika-config.xml (original)
+++ incubator/tika/trunk/src/main/resources/tika-config.xml Fri Sep  5 06:06:00 2008
@@ -121,6 +121,10 @@
                 <mime>application/x-bzip</mime>
         </parser>
 
+        <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser">
+                <mime>application/x-tika-java-class</mime>
+        </parser>
+
     </parsers>
 
 </properties>
\ No newline at end of file

Added: incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java?rev=692435&view=auto
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java (added)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java Fri Sep  5 06:06:00 2008
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.asm;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+import junit.framework.TestCase;
+
+/**
+ * Test case for parsing Java class files.
+ */
+public class ClassParserTest extends TestCase {
+
+    public void testClassParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+
+        InputStream stream = ClassParserTest.class.getResourceAsStream(
+                "/test-documents/AutoDetectParser.class");
+        try {
+            parser.parse(stream, handler, metadata);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("AutoDetectParser", metadata.get(Metadata.TITLE));
+        assertEquals(
+                "AutoDetectParser.class",
+                metadata.get(Metadata.RESOURCE_NAME_KEY));
+
+        String content = handler.toString();
+        assertTrue(content.contains("package org.apache.tika.parser;"));
+        assertTrue(content.contains(
+                "class AutoDetectParser extends CompositeParser"));
+        assertTrue(content.contains(
+                "private org.apache.tika.mime.MimeTypes types"));
+        assertTrue(content.contains(
+                "public void parse("
+                + "java.io.InputStream, org.xml.sax.ContentHandler,"
+                + " org.apache.tika.metadata.Metadata) throws"
+                + " java.io.IOException, org.xml.sax.SAXException,"
+                + " org.apache.tika.exception.TikaException;"));
+        assertTrue(content.contains(
+                "private byte[] getPrefix(java.io.InputStream, int)"
+                + " throws java.io.IOException;"));
+    }
+
+}

Propchange: incubator/tika/trunk/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class?rev=692435&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/AutoDetectParser.class
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream