You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:28 UTC

[22/39] tika git commit: Convert new lines from windows to unix

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
index 75b556c..a32d406 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
@@ -1,496 +1,496 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.odf;
-
-import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
-
-import javax.xml.namespace.QName;
-import javax.xml.parsers.SAXParser;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.BitSet;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-import java.util.Stack;
-
-import org.apache.commons.io.input.CloseShieldInputStream;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.ElementMappingContentHandler;
-import org.apache.tika.sax.ElementMappingContentHandler.TargetElement;
-import org.apache.tika.sax.OfflineContentHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.Attributes;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.AttributesImpl;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * Parser for ODF <code>content.xml</code> files.
- */
-public class OpenDocumentContentParser extends AbstractParser {
-    private interface Style {
-    }
-
-    private static class TextStyle implements Style {
-        public boolean italic;
-        public boolean bold;
-        public boolean underlined;
-    }
-
-    private static class ListStyle implements Style {
-        public boolean ordered;
-
-        public String getTag() {
-            return ordered ? "ol" : "ul";
-        }
-    }
-
-    private static final class OpenDocumentElementMappingContentHandler extends
-            ElementMappingContentHandler {
-        private final ContentHandler handler;
-        private final BitSet textNodeStack = new BitSet();
-        private int nodeDepth = 0;
-        private int completelyFiltered = 0;
-        private Stack<String> headingStack = new Stack<String>();
-        private Map<String, TextStyle> textStyleMap = new HashMap<String, TextStyle>();
-        private Map<String, ListStyle> listStyleMap = new HashMap<String, ListStyle>();
-        private TextStyle textStyle;
-        private TextStyle lastTextStyle;
-        private Stack<ListStyle> listStyleStack = new Stack<ListStyle>();
-        private ListStyle listStyle;
-
-        private OpenDocumentElementMappingContentHandler(ContentHandler handler,
-                                                         Map<QName, TargetElement> mappings) {
-            super(handler, mappings);
-            this.handler = handler;
-        }
-
-        @Override
-        public void characters(char[] ch, int start, int length)
-                throws SAXException {
-            // only forward content of tags from text:-namespace
-            if (completelyFiltered == 0 && nodeDepth > 0
-                    && textNodeStack.get(nodeDepth - 1)) {
-                lazyEndSpan();
-                super.characters(ch, start, length);
-            }
-        }
-
-        // helper for checking tags which need complete filtering
-        // (with sub-tags)
-        private boolean needsCompleteFiltering(
-                String namespaceURI, String localName) {
-            if (TEXT_NS.equals(namespaceURI)) {
-                return localName.endsWith("-template")
-                        || localName.endsWith("-style");
-            }
-            return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
-        }
-
-        // map the heading level to <hX> HTML tags
-        private String getXHTMLHeaderTagName(Attributes atts) {
-            String depthStr = atts.getValue(TEXT_NS, "outline-level");
-            if (depthStr == null) {
-                return "h1";
-            }
-
-            int depth = Integer.parseInt(depthStr);
-            if (depth >= 6) {
-                return "h6";
-            } else if (depth <= 1) {
-                return "h1";
-            } else {
-                return "h" + depth;
-            }
-        }
-
-        /**
-         * Check if a node is a text node
-         */
-        private boolean isTextNode(String namespaceURI, String localName) {
-            if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") && !localName.equals("page-count")) {
-                return true;
-            }
-            if (SVG_NS.equals(namespaceURI)) {
-                return "title".equals(localName) ||
-                        "desc".equals(localName);
-            }
-            return false;
-        }
-
-        private void startList(String name) throws SAXException {
-            String elementName = "ul";
-            if (name != null) {
-                ListStyle style = listStyleMap.get(name);
-                elementName = style != null ? style.getTag() : "ul";
-                listStyleStack.push(style);
-            }
-            handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
-        }
-
-        private void endList() throws SAXException {
-            String elementName = "ul";
-            if (!listStyleStack.isEmpty()) {
-                ListStyle style = listStyleStack.pop();
-                elementName = style != null ? style.getTag() : "ul";
-            }
-            handler.endElement(XHTML, elementName, elementName);
-        }
-
-        private void startSpan(String name) throws SAXException {
-            if (name == null) {
-                return;
-            }
-
-            TextStyle style = textStyleMap.get(name);
-            if (style == null) {
-                return;
-            }
-
-            // End tags that refer to no longer valid styles
-            if (!style.underlined && lastTextStyle != null && lastTextStyle.underlined) {
-                handler.endElement(XHTML, "u", "u");
-            }
-            if (!style.italic && lastTextStyle != null && lastTextStyle.italic) {
-                handler.endElement(XHTML, "i", "i");
-            }
-            if (!style.bold && lastTextStyle != null && lastTextStyle.bold) {
-                handler.endElement(XHTML, "b", "b");
-            }
-
-            // Start tags for new styles
-            if (style.bold && (lastTextStyle == null || !lastTextStyle.bold)) {
-                handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
-            }
-            if (style.italic && (lastTextStyle == null || !lastTextStyle.italic)) {
-                handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
-            }
-            if (style.underlined && (lastTextStyle == null || !lastTextStyle.underlined)) {
-                handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
-            }
-
-            textStyle = style;
-            lastTextStyle = null;
-        }
-
-        private void endSpan() throws SAXException {
-            lastTextStyle = textStyle;
-            textStyle = null;
-        }
-
-        private void lazyEndSpan() throws SAXException {
-            if (lastTextStyle == null) {
-                return;
-            }
-
-            if (lastTextStyle.underlined) {
-                handler.endElement(XHTML, "u", "u");
-            }
-            if (lastTextStyle.italic) {
-                handler.endElement(XHTML, "i", "i");
-            }
-            if (lastTextStyle.bold) {
-                handler.endElement(XHTML, "b", "b");
-            }
-
-            lastTextStyle = null;
-        }
-
-        @Override
-        public void startElement(
-                String namespaceURI, String localName, String qName,
-                Attributes attrs) throws SAXException {
-            // keep track of current node type. If it is a text node,
-            // a bit at the current depth its set in textNodeStack.
-            // characters() checks the top bit to determine, if the
-            // actual node is a text node to print out nodeDepth contains
-            // the depth of the current node and also marks top of stack.
-            assert nodeDepth >= 0;
-
-            // Set styles
-            if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
-                String family = attrs.getValue(STYLE_NS, "family");
-                if ("text".equals(family)) {
-                    textStyle = new TextStyle();
-                    String name = attrs.getValue(STYLE_NS, "name");
-                    textStyleMap.put(name, textStyle);
-                }
-            } else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
-                listStyle = new ListStyle();
-                String name = attrs.getValue(STYLE_NS, "name");
-                listStyleMap.put(name, listStyle);
-            } else if (textStyle != null && STYLE_NS.equals(namespaceURI)
-                    && "text-properties".equals(localName)) {
-                String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
-                if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
-                    textStyle.italic = true;
-                }
-                String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
-                if ("bold".equals(fontWeight) || "bolder".equals(fontWeight)
-                        || (fontWeight != null && Character.isDigit(fontWeight.charAt(0))
-                        && Integer.valueOf(fontWeight) > 500)) {
-                    textStyle.bold = true;
-                }
-                String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
-                if (underlineStyle != null) {
-                    textStyle.underlined = true;
-                }
-            } else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
-                if ("list-level-style-bullet".equals(localName)) {
-                    listStyle.ordered = false;
-                } else if ("list-level-style-number".equals(localName)) {
-                    listStyle.ordered = true;
-                }
-            }
-
-            textNodeStack.set(nodeDepth++,
-                    isTextNode(namespaceURI, localName));
-            // filter *all* content of some tags
-            assert completelyFiltered >= 0;
-
-            if (needsCompleteFiltering(namespaceURI, localName)) {
-                completelyFiltered++;
-            }
-            // call next handler if no filtering
-            if (completelyFiltered == 0) {
-                // special handling of text:h, that are directly passed
-                // to incoming handler
-                if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
-                    final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
-                    handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
-                } else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
-                    startList(attrs.getValue(TEXT_NS, "style-name"));
-                } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
-                    startSpan(attrs.getValue(TEXT_NS, "style-name"));
-                } else {
-                    super.startElement(namespaceURI, localName, qName, attrs);
-                }
-            }
-        }
-
-        @Override
-        public void endElement(
-                String namespaceURI, String localName, String qName)
-                throws SAXException {
-            if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
-                textStyle = null;
-            } else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
-                listStyle = null;
-            }
-
-            // call next handler if no filtering
-            if (completelyFiltered == 0) {
-                // special handling of text:h, that are directly passed
-                // to incoming handler
-                if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
-                    final String el = headingStack.pop();
-                    handler.endElement(XHTMLContentHandler.XHTML, el, el);
-                } else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
-                    endList();
-                } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
-                    endSpan();
-                } else {
-                    if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
-                        lazyEndSpan();
-                    }
-                    super.endElement(namespaceURI, localName, qName);
-                }
-
-                // special handling of tabulators
-                if (TEXT_NS.equals(namespaceURI)
-                        && ("tab-stop".equals(localName)
-                        || "tab".equals(localName))) {
-                    this.characters(TAB, 0, TAB.length);
-                }
-            }
-
-            // revert filter for *all* content of some tags
-            if (needsCompleteFiltering(namespaceURI, localName)) {
-                completelyFiltered--;
-            }
-            assert completelyFiltered >= 0;
-
-            // reduce current node depth
-            nodeDepth--;
-            assert nodeDepth >= 0;
-        }
-
-        @Override
-        public void startPrefixMapping(String prefix, String uri) {
-            // remove prefix mappings as they should not occur in XHTML
-        }
-
-        @Override
-        public void endPrefixMapping(String prefix) {
-            // remove prefix mappings as they should not occur in XHTML
-        }
-    }
-
-    public static final String TEXT_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
-
-    public static final String TABLE_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
-
-    public static final String STYLE_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:style:1.0";
-
-    public static final String FORMATTING_OBJECTS_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
-
-    public static final String OFFICE_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
-
-    public static final String SVG_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
-
-    public static final String PRESENTATION_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
-
-    public static final String DRAW_NS =
-            "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
-
-    public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
-
-    protected static final char[] TAB = new char[]{'\t'};
-
-    private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
-
-    /**
-     * Mappings between ODF tag names and XHTML tag names
-     * (including attributes). All other tag names/attributes are ignored
-     * and left out from event stream.
-     */
-    private static final HashMap<QName, TargetElement> MAPPINGS =
-            new HashMap<QName, TargetElement>();
-
-    static {
-        // general mappings of text:-tags
-        MAPPINGS.put(
-                new QName(TEXT_NS, "p"),
-                new TargetElement(XHTML, "p"));
-        // text:h-tags are mapped specifically in startElement/endElement
-        MAPPINGS.put(
-                new QName(TEXT_NS, "line-break"),
-                new TargetElement(XHTML, "br"));
-        MAPPINGS.put(
-                new QName(TEXT_NS, "list-item"),
-                new TargetElement(XHTML, "li"));
-        MAPPINGS.put(
-                new QName(TEXT_NS, "note"),
-                new TargetElement(XHTML, "div"));
-        MAPPINGS.put(
-                new QName(OFFICE_NS, "annotation"),
-                new TargetElement(XHTML, "div"));
-        MAPPINGS.put(
-                new QName(PRESENTATION_NS, "notes"),
-                new TargetElement(XHTML, "div"));
-        MAPPINGS.put(
-                new QName(DRAW_NS, "object"),
-                new TargetElement(XHTML, "object"));
-        MAPPINGS.put(
-                new QName(DRAW_NS, "text-box"),
-                new TargetElement(XHTML, "div"));
-        MAPPINGS.put(
-                new QName(SVG_NS, "title"),
-                new TargetElement(XHTML, "span"));
-        MAPPINGS.put(
-                new QName(SVG_NS, "desc"),
-                new TargetElement(XHTML, "span"));
-        MAPPINGS.put(
-                new QName(TEXT_NS, "span"),
-                new TargetElement(XHTML, "span"));
-
-        final HashMap<QName, QName> aAttsMapping =
-                new HashMap<QName, QName>();
-        aAttsMapping.put(
-                new QName(XLINK_NS, "href"),
-                new QName("href"));
-        aAttsMapping.put(
-                new QName(XLINK_NS, "title"),
-                new QName("title"));
-        MAPPINGS.put(
-                new QName(TEXT_NS, "a"),
-                new TargetElement(XHTML, "a", aAttsMapping));
-
-        // create HTML tables from table:-tags
-        MAPPINGS.put(
-                new QName(TABLE_NS, "table"),
-                new TargetElement(XHTML, "table"));
-        // repeating of rows is ignored; for columns, see below!
-        MAPPINGS.put(
-                new QName(TABLE_NS, "table-row"),
-                new TargetElement(XHTML, "tr"));
-        // special mapping for rowspan/colspan attributes
-        final HashMap<QName, QName> tableCellAttsMapping =
-                new HashMap<QName, QName>();
-        tableCellAttsMapping.put(
-                new QName(TABLE_NS, "number-columns-spanned"),
-                new QName("colspan"));
-        tableCellAttsMapping.put(
-                new QName(TABLE_NS, "number-rows-spanned"),
-                new QName("rowspan"));
-        /* TODO: The following is not correct, the cell should be repeated not spanned!
-         * Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
-         * Problems may occur when both spanning and repeating is given, which is not allowed by spec.
-         * Cell spanning instead of repeating  is not a problem, because OpenOffice uses it
-         * only for empty cells.
-         */
-        tableCellAttsMapping.put(
-                new QName(TABLE_NS, "number-columns-repeated"),
-                new QName("colspan"));
-        MAPPINGS.put(
-                new QName(TABLE_NS, "table-cell"),
-                new TargetElement(XHTML, "td", tableCellAttsMapping));
-    }
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return Collections.emptySet(); // not a top-level parser
-    }
-
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-        parseInternal(stream,
-                new XHTMLContentHandler(handler, metadata),
-                metadata, context);
-    }
-
-    void parseInternal(
-            InputStream stream, final ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-
-        DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS);
-
-
-        SAXParser parser = context.getSAXParser();
-        parser.parse(
-            new CloseShieldInputStream(stream),
-            new OfflineContentHandler(
-                    new NSNormalizerContentHandler(dh)));
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.odf;
+
+import static org.apache.tika.sax.XHTMLContentHandler.XHTML;
+
+import javax.xml.namespace.QName;
+import javax.xml.parsers.SAXParser;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+import java.util.Stack;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.ElementMappingContentHandler;
+import org.apache.tika.sax.ElementMappingContentHandler.TargetElement;
+import org.apache.tika.sax.OfflineContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Parser for ODF <code>content.xml</code> files.
+ */
+public class OpenDocumentContentParser extends AbstractParser {
+    private interface Style {
+    }
+
+    private static class TextStyle implements Style {
+        public boolean italic;
+        public boolean bold;
+        public boolean underlined;
+    }
+
+    private static class ListStyle implements Style {
+        public boolean ordered;
+
+        public String getTag() {
+            return ordered ? "ol" : "ul";
+        }
+    }
+
+    private static final class OpenDocumentElementMappingContentHandler extends
+            ElementMappingContentHandler {
+        private final ContentHandler handler;
+        private final BitSet textNodeStack = new BitSet();
+        private int nodeDepth = 0;
+        private int completelyFiltered = 0;
+        private Stack<String> headingStack = new Stack<String>();
+        private Map<String, TextStyle> textStyleMap = new HashMap<String, TextStyle>();
+        private Map<String, ListStyle> listStyleMap = new HashMap<String, ListStyle>();
+        private TextStyle textStyle;
+        private TextStyle lastTextStyle;
+        private Stack<ListStyle> listStyleStack = new Stack<ListStyle>();
+        private ListStyle listStyle;
+
+        private OpenDocumentElementMappingContentHandler(ContentHandler handler,
+                                                         Map<QName, TargetElement> mappings) {
+            super(handler, mappings);
+            this.handler = handler;
+        }
+
+        @Override
+        public void characters(char[] ch, int start, int length)
+                throws SAXException {
+            // only forward content of tags from text:-namespace
+            if (completelyFiltered == 0 && nodeDepth > 0
+                    && textNodeStack.get(nodeDepth - 1)) {
+                lazyEndSpan();
+                super.characters(ch, start, length);
+            }
+        }
+
+        // helper for checking tags which need complete filtering
+        // (with sub-tags)
+        private boolean needsCompleteFiltering(
+                String namespaceURI, String localName) {
+            if (TEXT_NS.equals(namespaceURI)) {
+                return localName.endsWith("-template")
+                        || localName.endsWith("-style");
+            }
+            return TABLE_NS.equals(namespaceURI) && "covered-table-cell".equals(localName);
+        }
+
+        // map the heading level to <hX> HTML tags
+        private String getXHTMLHeaderTagName(Attributes atts) {
+            String depthStr = atts.getValue(TEXT_NS, "outline-level");
+            if (depthStr == null) {
+                return "h1";
+            }
+
+            int depth = Integer.parseInt(depthStr);
+            if (depth >= 6) {
+                return "h6";
+            } else if (depth <= 1) {
+                return "h1";
+            } else {
+                return "h" + depth;
+            }
+        }
+
+        /**
+         * Check if a node is a text node
+         */
+        private boolean isTextNode(String namespaceURI, String localName) {
+            if (TEXT_NS.equals(namespaceURI) && !localName.equals("page-number") && !localName.equals("page-count")) {
+                return true;
+            }
+            if (SVG_NS.equals(namespaceURI)) {
+                return "title".equals(localName) ||
+                        "desc".equals(localName);
+            }
+            return false;
+        }
+
+        private void startList(String name) throws SAXException {
+            String elementName = "ul";
+            if (name != null) {
+                ListStyle style = listStyleMap.get(name);
+                elementName = style != null ? style.getTag() : "ul";
+                listStyleStack.push(style);
+            }
+            handler.startElement(XHTML, elementName, elementName, EMPTY_ATTRIBUTES);
+        }
+
+        private void endList() throws SAXException {
+            String elementName = "ul";
+            if (!listStyleStack.isEmpty()) {
+                ListStyle style = listStyleStack.pop();
+                elementName = style != null ? style.getTag() : "ul";
+            }
+            handler.endElement(XHTML, elementName, elementName);
+        }
+
+        private void startSpan(String name) throws SAXException {
+            if (name == null) {
+                return;
+            }
+
+            TextStyle style = textStyleMap.get(name);
+            if (style == null) {
+                return;
+            }
+
+            // End tags that refer to no longer valid styles
+            if (!style.underlined && lastTextStyle != null && lastTextStyle.underlined) {
+                handler.endElement(XHTML, "u", "u");
+            }
+            if (!style.italic && lastTextStyle != null && lastTextStyle.italic) {
+                handler.endElement(XHTML, "i", "i");
+            }
+            if (!style.bold && lastTextStyle != null && lastTextStyle.bold) {
+                handler.endElement(XHTML, "b", "b");
+            }
+
+            // Start tags for new styles
+            if (style.bold && (lastTextStyle == null || !lastTextStyle.bold)) {
+                handler.startElement(XHTML, "b", "b", EMPTY_ATTRIBUTES);
+            }
+            if (style.italic && (lastTextStyle == null || !lastTextStyle.italic)) {
+                handler.startElement(XHTML, "i", "i", EMPTY_ATTRIBUTES);
+            }
+            if (style.underlined && (lastTextStyle == null || !lastTextStyle.underlined)) {
+                handler.startElement(XHTML, "u", "u", EMPTY_ATTRIBUTES);
+            }
+
+            textStyle = style;
+            lastTextStyle = null;
+        }
+
+        private void endSpan() throws SAXException {
+            lastTextStyle = textStyle;
+            textStyle = null;
+        }
+
+        private void lazyEndSpan() throws SAXException {
+            if (lastTextStyle == null) {
+                return;
+            }
+
+            if (lastTextStyle.underlined) {
+                handler.endElement(XHTML, "u", "u");
+            }
+            if (lastTextStyle.italic) {
+                handler.endElement(XHTML, "i", "i");
+            }
+            if (lastTextStyle.bold) {
+                handler.endElement(XHTML, "b", "b");
+            }
+
+            lastTextStyle = null;
+        }
+
+        @Override
+        public void startElement(
+                String namespaceURI, String localName, String qName,
+                Attributes attrs) throws SAXException {
+            // keep track of current node type. If it is a text node,
+            // a bit at the current depth its set in textNodeStack.
+            // characters() checks the top bit to determine, if the
+            // actual node is a text node to print out nodeDepth contains
+            // the depth of the current node and also marks top of stack.
+            assert nodeDepth >= 0;
+
+            // Set styles
+            if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
+                String family = attrs.getValue(STYLE_NS, "family");
+                if ("text".equals(family)) {
+                    textStyle = new TextStyle();
+                    String name = attrs.getValue(STYLE_NS, "name");
+                    textStyleMap.put(name, textStyle);
+                }
+            } else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
+                listStyle = new ListStyle();
+                String name = attrs.getValue(STYLE_NS, "name");
+                listStyleMap.put(name, listStyle);
+            } else if (textStyle != null && STYLE_NS.equals(namespaceURI)
+                    && "text-properties".equals(localName)) {
+                String fontStyle = attrs.getValue(FORMATTING_OBJECTS_NS, "font-style");
+                if ("italic".equals(fontStyle) || "oblique".equals(fontStyle)) {
+                    textStyle.italic = true;
+                }
+                String fontWeight = attrs.getValue(FORMATTING_OBJECTS_NS, "font-weight");
+                if ("bold".equals(fontWeight) || "bolder".equals(fontWeight)
+                        || (fontWeight != null && Character.isDigit(fontWeight.charAt(0))
+                        && Integer.valueOf(fontWeight) > 500)) {
+                    textStyle.bold = true;
+                }
+                String underlineStyle = attrs.getValue(STYLE_NS, "text-underline-style");
+                if (underlineStyle != null) {
+                    textStyle.underlined = true;
+                }
+            } else if (listStyle != null && TEXT_NS.equals(namespaceURI)) {
+                if ("list-level-style-bullet".equals(localName)) {
+                    listStyle.ordered = false;
+                } else if ("list-level-style-number".equals(localName)) {
+                    listStyle.ordered = true;
+                }
+            }
+
+            textNodeStack.set(nodeDepth++,
+                    isTextNode(namespaceURI, localName));
+            // filter *all* content of some tags
+            assert completelyFiltered >= 0;
+
+            if (needsCompleteFiltering(namespaceURI, localName)) {
+                completelyFiltered++;
+            }
+            // call next handler if no filtering
+            if (completelyFiltered == 0) {
+                // special handling of text:h, that are directly passed
+                // to incoming handler
+                if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
+                    final String el = headingStack.push(getXHTMLHeaderTagName(attrs));
+                    handler.startElement(XHTMLContentHandler.XHTML, el, el, EMPTY_ATTRIBUTES);
+                } else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
+                    startList(attrs.getValue(TEXT_NS, "style-name"));
+                } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
+                    startSpan(attrs.getValue(TEXT_NS, "style-name"));
+                } else {
+                    super.startElement(namespaceURI, localName, qName, attrs);
+                }
+            }
+        }
+
+        @Override
+        public void endElement(
+                String namespaceURI, String localName, String qName)
+                throws SAXException {
+            if (STYLE_NS.equals(namespaceURI) && "style".equals(localName)) {
+                textStyle = null;
+            } else if (TEXT_NS.equals(namespaceURI) && "list-style".equals(localName)) {
+                listStyle = null;
+            }
+
+            // call next handler if no filtering
+            if (completelyFiltered == 0) {
+                // special handling of text:h, that are directly passed
+                // to incoming handler
+                if (TEXT_NS.equals(namespaceURI) && "h".equals(localName)) {
+                    final String el = headingStack.pop();
+                    handler.endElement(XHTMLContentHandler.XHTML, el, el);
+                } else if (TEXT_NS.equals(namespaceURI) && "list".equals(localName)) {
+                    endList();
+                } else if (TEXT_NS.equals(namespaceURI) && "span".equals(localName)) {
+                    endSpan();
+                } else {
+                    if (TEXT_NS.equals(namespaceURI) && "p".equals(localName)) {
+                        lazyEndSpan();
+                    }
+                    super.endElement(namespaceURI, localName, qName);
+                }
+
+                // special handling of tabulators
+                if (TEXT_NS.equals(namespaceURI)
+                        && ("tab-stop".equals(localName)
+                        || "tab".equals(localName))) {
+                    this.characters(TAB, 0, TAB.length);
+                }
+            }
+
+            // revert filter for *all* content of some tags
+            if (needsCompleteFiltering(namespaceURI, localName)) {
+                completelyFiltered--;
+            }
+            assert completelyFiltered >= 0;
+
+            // reduce current node depth
+            nodeDepth--;
+            assert nodeDepth >= 0;
+        }
+
+        @Override
+        public void startPrefixMapping(String prefix, String uri) {
+            // remove prefix mappings as they should not occur in XHTML
+        }
+
+        @Override
+        public void endPrefixMapping(String prefix) {
+            // remove prefix mappings as they should not occur in XHTML
+        }
+    }
+
+    public static final String TEXT_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:text:1.0";
+
+    public static final String TABLE_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:table:1.0";
+
+    public static final String STYLE_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:style:1.0";
+
+    public static final String FORMATTING_OBJECTS_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0";
+
+    public static final String OFFICE_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
+
+    public static final String SVG_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0";
+
+    public static final String PRESENTATION_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:presentation:1.0";
+
+    public static final String DRAW_NS =
+            "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0";
+
+    public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
+
+    protected static final char[] TAB = new char[]{'\t'};
+
+    private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
+
+    /**
+     * Mappings between ODF tag names and XHTML tag names
+     * (including attributes). All other tag names/attributes are ignored
+     * and left out from event stream.
+     */
+    private static final HashMap<QName, TargetElement> MAPPINGS =
+            new HashMap<QName, TargetElement>();
+
+    static {
+        // general mappings of text:-tags
+        MAPPINGS.put(
+                new QName(TEXT_NS, "p"),
+                new TargetElement(XHTML, "p"));
+        // text:h-tags are mapped specifically in startElement/endElement
+        MAPPINGS.put(
+                new QName(TEXT_NS, "line-break"),
+                new TargetElement(XHTML, "br"));
+        MAPPINGS.put(
+                new QName(TEXT_NS, "list-item"),
+                new TargetElement(XHTML, "li"));
+        MAPPINGS.put(
+                new QName(TEXT_NS, "note"),
+                new TargetElement(XHTML, "div"));
+        MAPPINGS.put(
+                new QName(OFFICE_NS, "annotation"),
+                new TargetElement(XHTML, "div"));
+        MAPPINGS.put(
+                new QName(PRESENTATION_NS, "notes"),
+                new TargetElement(XHTML, "div"));
+        MAPPINGS.put(
+                new QName(DRAW_NS, "object"),
+                new TargetElement(XHTML, "object"));
+        MAPPINGS.put(
+                new QName(DRAW_NS, "text-box"),
+                new TargetElement(XHTML, "div"));
+        MAPPINGS.put(
+                new QName(SVG_NS, "title"),
+                new TargetElement(XHTML, "span"));
+        MAPPINGS.put(
+                new QName(SVG_NS, "desc"),
+                new TargetElement(XHTML, "span"));
+        MAPPINGS.put(
+                new QName(TEXT_NS, "span"),
+                new TargetElement(XHTML, "span"));
+
+        final HashMap<QName, QName> aAttsMapping =
+                new HashMap<QName, QName>();
+        aAttsMapping.put(
+                new QName(XLINK_NS, "href"),
+                new QName("href"));
+        aAttsMapping.put(
+                new QName(XLINK_NS, "title"),
+                new QName("title"));
+        MAPPINGS.put(
+                new QName(TEXT_NS, "a"),
+                new TargetElement(XHTML, "a", aAttsMapping));
+
+        // create HTML tables from table:-tags
+        MAPPINGS.put(
+                new QName(TABLE_NS, "table"),
+                new TargetElement(XHTML, "table"));
+        // repeating of rows is ignored; for columns, see below!
+        MAPPINGS.put(
+                new QName(TABLE_NS, "table-row"),
+                new TargetElement(XHTML, "tr"));
+        // special mapping for rowspan/colspan attributes
+        final HashMap<QName, QName> tableCellAttsMapping =
+                new HashMap<QName, QName>();
+        tableCellAttsMapping.put(
+                new QName(TABLE_NS, "number-columns-spanned"),
+                new QName("colspan"));
+        tableCellAttsMapping.put(
+                new QName(TABLE_NS, "number-rows-spanned"),
+                new QName("rowspan"));
+        /* TODO: The following is not correct, the cell should be repeated not spanned!
+         * Code generates a HTML cell, spanning all repeated columns, to make the cell look correct.
+         * Problems may occur when both spanning and repeating is given, which is not allowed by spec.
+         * Cell spanning instead of repeating  is not a problem, because OpenOffice uses it
+         * only for empty cells.
+         */
+        tableCellAttsMapping.put(
+                new QName(TABLE_NS, "number-columns-repeated"),
+                new QName("colspan"));
+        MAPPINGS.put(
+                new QName(TABLE_NS, "table-cell"),
+                new TargetElement(XHTML, "td", tableCellAttsMapping));
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return Collections.emptySet(); // not a top-level parser
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        parseInternal(stream,
+                new XHTMLContentHandler(handler, metadata),
+                metadata, context);
+    }
+
+    void parseInternal(
+            InputStream stream, final ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+
+        DefaultHandler dh = new OpenDocumentElementMappingContentHandler(handler, MAPPINGS);
+
+
+        SAXParser parser = context.getSAXParser();
+        parser.parse(
+            new CloseShieldInputStream(stream),
+            new OfflineContentHandler(
+                    new NSNormalizerContentHandler(dh)));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
index 4713022..14b9674 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
@@ -1,199 +1,199 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.odf;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.DublinCore;
-import org.apache.tika.metadata.MSOffice;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
-import org.apache.tika.metadata.OfficeOpenXMLCore;
-import org.apache.tika.metadata.PagedText;
-import org.apache.tika.metadata.Property;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.xml.AttributeDependantMetadataHandler;
-import org.apache.tika.parser.xml.AttributeMetadataHandler;
-import org.apache.tika.parser.xml.ElementMetadataHandler;
-import org.apache.tika.parser.xml.MetadataHandler;
-import org.apache.tika.parser.xml.XMLParser;
-import org.apache.tika.sax.TeeContentHandler;
-import org.apache.tika.sax.xpath.CompositeMatcher;
-import org.apache.tika.sax.xpath.Matcher;
-import org.apache.tika.sax.xpath.MatchingContentHandler;
-import org.apache.tika.sax.xpath.XPathParser;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Parser for OpenDocument <code>meta.xml</code> files.
- */
-public class OpenDocumentMetaParser extends XMLParser {
-    /**
-     * Serial version UID
-     */
-    private static final long serialVersionUID = -8739250869531737584L;
-
-    private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
-    private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
-
-    /**
-     * @see OfficeOpenXMLCore#SUBJECT
-     * @deprecated use OfficeOpenXMLCore#SUBJECT
-     */
-    @Deprecated
-    private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR =
-            Property.composite(Office.INITIAL_AUTHOR,
-                    new Property[]{Property.externalText("initial-creator")});
-
-    private static ContentHandler getDublinCoreHandler(
-            Metadata metadata, Property property, String element) {
-        return new ElementMetadataHandler(
-                DublinCore.NAMESPACE_URI_DC, element,
-                metadata, property);
-    }
-
-    private static ContentHandler getMeta(
-            ContentHandler ch, Metadata md, Property property, String element) {
-        Matcher matcher = new CompositeMatcher(
-                META_XPATH.parse("//meta:" + element),
-                META_XPATH.parse("//meta:" + element + "//text()"));
-        ContentHandler branch =
-                new MatchingContentHandler(new MetadataHandler(md, property), matcher);
-        return new TeeContentHandler(ch, branch);
-    }
-
-    private static ContentHandler getUserDefined(
-            ContentHandler ch, Metadata md) {
-        Matcher matcher = new CompositeMatcher(
-                META_XPATH.parse("//meta:user-defined/@meta:name"),
-                META_XPATH.parse("//meta:user-defined//text()"));
-        // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
-        ContentHandler branch = new MatchingContentHandler(
-                new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
-                matcher);
-        return new TeeContentHandler(ch, branch);
-    }
-
-    @Deprecated
-    private static ContentHandler getStatistic(
-            ContentHandler ch, Metadata md, String name, String attribute) {
-        Matcher matcher =
-                META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
-        ContentHandler branch = new MatchingContentHandler(
-                new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
-        return new TeeContentHandler(ch, branch);
-    }
-
-    private static ContentHandler getStatistic(
-            ContentHandler ch, Metadata md, Property property, String attribute) {
-        Matcher matcher =
-                META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
-        ContentHandler branch = new MatchingContentHandler(
-                new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
-        return new TeeContentHandler(ch, branch);
-    }
-
-    protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
-        // We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
-        // Process the Dublin Core Attributes 
-        ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
-                getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
-                getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
-                getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
-                getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
-                getDublinCoreHandler(md, TikaCoreProperties.CONTRIBUTOR, "contributor"),
-                getDublinCoreHandler(md, TikaCoreProperties.TYPE, "type"),
-                getDublinCoreHandler(md, TikaCoreProperties.FORMAT, "format"),
-                getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, "identifier"),
-                getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, "language"),
-                getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights"));
-
-        // Process the OO Meta Attributes
-        ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
-        // ODF uses dc:date for modified
-        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
-                DublinCore.NAMESPACE_URI_DC, "date",
-                md, TikaCoreProperties.MODIFIED));
-
-        // ODF uses dc:subject for description
-        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
-                DublinCore.NAMESPACE_URI_DC, "subject",
-                md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
-        ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
-
-        ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");
-        ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
-        ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator");
-        ch = getMeta(ch, md, Property.externalText("generator"), "generator");
-
-        // Process the user defined Meta Attributes
-        ch = getUserDefined(ch, md);
-
-        // Process the OO Statistics Attributes
-        ch = getStatistic(ch, md, Office.OBJECT_COUNT, "object-count");
-        ch = getStatistic(ch, md, Office.IMAGE_COUNT, "image-count");
-        ch = getStatistic(ch, md, Office.PAGE_COUNT, "page-count");
-        ch = getStatistic(ch, md, PagedText.N_PAGES, "page-count");
-        ch = getStatistic(ch, md, Office.TABLE_COUNT, "table-count");
-        ch = getStatistic(ch, md, Office.PARAGRAPH_COUNT, "paragraph-count");
-        ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
-        ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
-
-        // Legacy, Tika-1.0 style attributes
-        // TODO Remove these in Tika 2.0
-        ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
-        ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
-        ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
-        ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
-        ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
-        ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
-        ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
-
-        // Legacy Statistics Attributes, replaced with real keys above
-        // TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
-        ch = getStatistic(ch, md, "nbPage", "page-count");
-        ch = getStatistic(ch, md, "nbPara", "paragraph-count");
-        ch = getStatistic(ch, md, "nbWord", "word-count");
-        ch = getStatistic(ch, md, "nbCharacter", "character-count");
-        ch = getStatistic(ch, md, "nbTab", "table-count");
-        ch = getStatistic(ch, md, "nbObject", "object-count");
-        ch = getStatistic(ch, md, "nbImg", "image-count");
-
-        // Normalise the rest
-        ch = new NSNormalizerContentHandler(ch);
-        return ch;
-    }
-
-    @Override
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-        super.parse(stream, handler, metadata, context);
-        // Copy subject to description for OO2
-        String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
-        if (odfSubject != null && !odfSubject.equals("") &&
-                (metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
-            metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.odf;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.MSOffice;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLCore;
+import org.apache.tika.metadata.PagedText;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.xml.AttributeDependantMetadataHandler;
+import org.apache.tika.parser.xml.AttributeMetadataHandler;
+import org.apache.tika.parser.xml.ElementMetadataHandler;
+import org.apache.tika.parser.xml.MetadataHandler;
+import org.apache.tika.parser.xml.XMLParser;
+import org.apache.tika.sax.TeeContentHandler;
+import org.apache.tika.sax.xpath.CompositeMatcher;
+import org.apache.tika.sax.xpath.Matcher;
+import org.apache.tika.sax.xpath.MatchingContentHandler;
+import org.apache.tika.sax.xpath.XPathParser;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for OpenDocument <code>meta.xml</code> files.
+ */
+public class OpenDocumentMetaParser extends XMLParser {
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = -8739250869531737584L;
+
+    private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
+    private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
+
+    /**
+     * @see OfficeOpenXMLCore#SUBJECT
+     * @deprecated use OfficeOpenXMLCore#SUBJECT
+     */
+    @Deprecated
+    private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR =
+            Property.composite(Office.INITIAL_AUTHOR,
+                    new Property[]{Property.externalText("initial-creator")});
+
+    private static ContentHandler getDublinCoreHandler(
+            Metadata metadata, Property property, String element) {
+        return new ElementMetadataHandler(
+                DublinCore.NAMESPACE_URI_DC, element,
+                metadata, property);
+    }
+
+    private static ContentHandler getMeta(
+            ContentHandler ch, Metadata md, Property property, String element) {
+        Matcher matcher = new CompositeMatcher(
+                META_XPATH.parse("//meta:" + element),
+                META_XPATH.parse("//meta:" + element + "//text()"));
+        ContentHandler branch =
+                new MatchingContentHandler(new MetadataHandler(md, property), matcher);
+        return new TeeContentHandler(ch, branch);
+    }
+
+    private static ContentHandler getUserDefined(
+            ContentHandler ch, Metadata md) {
+        Matcher matcher = new CompositeMatcher(
+                META_XPATH.parse("//meta:user-defined/@meta:name"),
+                META_XPATH.parse("//meta:user-defined//text()"));
+        // eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
+        ContentHandler branch = new MatchingContentHandler(
+                new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
+                matcher);
+        return new TeeContentHandler(ch, branch);
+    }
+
+    @Deprecated
+    private static ContentHandler getStatistic(
+            ContentHandler ch, Metadata md, String name, String attribute) {
+        Matcher matcher =
+                META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
+        ContentHandler branch = new MatchingContentHandler(
+                new AttributeMetadataHandler(META_NS, attribute, md, name), matcher);
+        return new TeeContentHandler(ch, branch);
+    }
+
+    private static ContentHandler getStatistic(
+            ContentHandler ch, Metadata md, Property property, String attribute) {
+        Matcher matcher =
+                META_XPATH.parse("//meta:document-statistic/@meta:" + attribute);
+        ContentHandler branch = new MatchingContentHandler(
+                new AttributeMetadataHandler(META_NS, attribute, md, property), matcher);
+        return new TeeContentHandler(ch, branch);
+    }
+
+    protected ContentHandler getContentHandler(ContentHandler ch, Metadata md, ParseContext context) {
+        // We can no longer extend DcXMLParser due to the handling of dc:subject and dc:date
+        // Process the Dublin Core Attributes 
+        ch = new TeeContentHandler(super.getContentHandler(ch, md, context),
+                getDublinCoreHandler(md, TikaCoreProperties.TITLE, "title"),
+                getDublinCoreHandler(md, TikaCoreProperties.CREATOR, "creator"),
+                getDublinCoreHandler(md, TikaCoreProperties.DESCRIPTION, "description"),
+                getDublinCoreHandler(md, TikaCoreProperties.PUBLISHER, "publisher"),
+                getDublinCoreHandler(md, TikaCoreProperties.CONTRIBUTOR, "contributor"),
+                getDublinCoreHandler(md, TikaCoreProperties.TYPE, "type"),
+                getDublinCoreHandler(md, TikaCoreProperties.FORMAT, "format"),
+                getDublinCoreHandler(md, TikaCoreProperties.IDENTIFIER, "identifier"),
+                getDublinCoreHandler(md, TikaCoreProperties.LANGUAGE, "language"),
+                getDublinCoreHandler(md, TikaCoreProperties.RIGHTS, "rights"));
+
+        // Process the OO Meta Attributes
+        ch = getMeta(ch, md, TikaCoreProperties.CREATED, "creation-date");
+        // ODF uses dc:date for modified
+        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
+                DublinCore.NAMESPACE_URI_DC, "date",
+                md, TikaCoreProperties.MODIFIED));
+
+        // ODF uses dc:subject for description
+        ch = new TeeContentHandler(ch, new ElementMetadataHandler(
+                DublinCore.NAMESPACE_URI_DC, "subject",
+                md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
+        ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
+
+        ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");
+        ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
+        ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator");
+        ch = getMeta(ch, md, Property.externalText("generator"), "generator");
+
+        // Process the user defined Meta Attributes
+        ch = getUserDefined(ch, md);
+
+        // Process the OO Statistics Attributes
+        ch = getStatistic(ch, md, Office.OBJECT_COUNT, "object-count");
+        ch = getStatistic(ch, md, Office.IMAGE_COUNT, "image-count");
+        ch = getStatistic(ch, md, Office.PAGE_COUNT, "page-count");
+        ch = getStatistic(ch, md, PagedText.N_PAGES, "page-count");
+        ch = getStatistic(ch, md, Office.TABLE_COUNT, "table-count");
+        ch = getStatistic(ch, md, Office.PARAGRAPH_COUNT, "paragraph-count");
+        ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
+        ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
+
+        // Legacy, Tika-1.0 style attributes
+        // TODO Remove these in Tika 2.0
+        ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
+        ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
+        ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
+        ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
+        ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
+        ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
+        ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
+
+        // Legacy Statistics Attributes, replaced with real keys above
+        // TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
+        ch = getStatistic(ch, md, "nbPage", "page-count");
+        ch = getStatistic(ch, md, "nbPara", "paragraph-count");
+        ch = getStatistic(ch, md, "nbWord", "word-count");
+        ch = getStatistic(ch, md, "nbCharacter", "character-count");
+        ch = getStatistic(ch, md, "nbTab", "table-count");
+        ch = getStatistic(ch, md, "nbObject", "object-count");
+        ch = getStatistic(ch, md, "nbImg", "image-count");
+
+        // Normalise the rest
+        ch = new NSNormalizerContentHandler(ch);
+        return ch;
+    }
+
+    @Override
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        super.parse(stream, handler, metadata, context);
+        // Copy subject to description for OO2
+        String odfSubject = metadata.get(OfficeOpenXMLCore.SUBJECT);
+        if (odfSubject != null && !odfSubject.equals("") &&
+                (metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
+            metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index 2739340..00145d2 100644
--- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -1,225 +1,225 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.odf;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipInputStream;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.EndDocumentShieldingContentHandler;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * OpenOffice parser
- */
-public class OpenDocumentParser extends AbstractParser {
-
-    /**
-     * Serial version UID
-     */
-    private static final long serialVersionUID = -6410276875438618287L;
-
-    private static final Set<MediaType> SUPPORTED_TYPES =
-            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
-                    MediaType.application("vnd.sun.xml.writer"),
-                    MediaType.application("vnd.oasis.opendocument.text"),
-                    MediaType.application("vnd.oasis.opendocument.graphics"),
-                    MediaType.application("vnd.oasis.opendocument.presentation"),
-                    MediaType.application("vnd.oasis.opendocument.spreadsheet"),
-                    MediaType.application("vnd.oasis.opendocument.chart"),
-                    MediaType.application("vnd.oasis.opendocument.image"),
-                    MediaType.application("vnd.oasis.opendocument.formula"),
-                    MediaType.application("vnd.oasis.opendocument.text-master"),
-                    MediaType.application("vnd.oasis.opendocument.text-web"),
-                    MediaType.application("vnd.oasis.opendocument.text-template"),
-                    MediaType.application("vnd.oasis.opendocument.graphics-template"),
-                    MediaType.application("vnd.oasis.opendocument.presentation-template"),
-                    MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
-                    MediaType.application("vnd.oasis.opendocument.chart-template"),
-                    MediaType.application("vnd.oasis.opendocument.image-template"),
-                    MediaType.application("vnd.oasis.opendocument.formula-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.text"),
-                    MediaType.application("x-vnd.oasis.opendocument.graphics"),
-                    MediaType.application("x-vnd.oasis.opendocument.presentation"),
-                    MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
-                    MediaType.application("x-vnd.oasis.opendocument.chart"),
-                    MediaType.application("x-vnd.oasis.opendocument.image"),
-                    MediaType.application("x-vnd.oasis.opendocument.formula"),
-                    MediaType.application("x-vnd.oasis.opendocument.text-master"),
-                    MediaType.application("x-vnd.oasis.opendocument.text-web"),
-                    MediaType.application("x-vnd.oasis.opendocument.text-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.chart-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.image-template"),
-                    MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
-
-    private static final String META_NAME = "meta.xml";
-
-    private Parser meta = new OpenDocumentMetaParser();
-
-    private Parser content = new OpenDocumentContentParser();
-
-    public Parser getMetaParser() {
-        return meta;
-    }
-
-    public void setMetaParser(Parser meta) {
-        this.meta = meta;
-    }
-
-    public Parser getContentParser() {
-        return content;
-    }
-
-    public void setContentParser(Parser content) {
-        this.content = content;
-    }
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    public void parse(
-            InputStream stream, ContentHandler baseHandler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-
-        // Open the Zip stream
-        // Use a File if we can, and an already open zip is even better
-        ZipFile zipFile = null;
-        ZipInputStream zipStream = null;
-        if (stream instanceof TikaInputStream) {
-            TikaInputStream tis = (TikaInputStream) stream;
-            Object container = ((TikaInputStream) stream).getOpenContainer();
-            if (container instanceof ZipFile) {
-                zipFile = (ZipFile) container;
-            } else if (tis.hasFile()) {
-                zipFile = new ZipFile(tis.getFile());
-            } else {
-                zipStream = new ZipInputStream(stream);
-            }
-        } else {
-            zipStream = new ZipInputStream(stream);
-        }
-
-        // Prepare to handle the content
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
-
-        // As we don't know which of the metadata or the content
-        //  we'll hit first, catch the endDocument call initially
-        EndDocumentShieldingContentHandler handler =
-                new EndDocumentShieldingContentHandler(xhtml);
-
-        if (zipFile != null) {
-            try {
-                handleZipFile(zipFile, metadata, context, handler);
-            } finally {
-                //Do we want to close silently == catch an exception here?
-                zipFile.close();
-            }
-        } else {
-            try {
-                handleZipStream(zipStream, metadata, context, handler);
-            } finally {
-                //Do we want to close silently == catch an exception here?
-                zipStream.close();
-            }
-        }
-
-        // Only now call the end document
-        if (handler.getEndDocumentWasCalled()) {
-            handler.reallyEndDocument();
-        }
-    }
-
-    private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
-        ZipEntry entry = zipStream.getNextEntry();
-        while (entry != null) {
-            handleZipEntry(entry, zipStream, metadata, context, handler);
-            entry = zipStream.getNextEntry();
-        }
-    }
-
-    private void handleZipFile(ZipFile zipFile, Metadata metadata,
-                               ParseContext context, EndDocumentShieldingContentHandler handler)
-            throws IOException, TikaException, SAXException {
-        // If we can, process the metadata first, then the
-        //  rest of the file afterwards (TIKA-1353)
-        // Only possible to guarantee that when opened from a file not a stream
-
-        ZipEntry entry = zipFile.getEntry(META_NAME);
-        if (entry != null) {
-            handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
-        }
-
-        Enumeration<? extends ZipEntry> entries = zipFile.entries();
-        while (entries.hasMoreElements()) {
-            entry = entries.nextElement();
-            if (!META_NAME.equals(entry.getName())) {
-                handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
-            }
-        }
-    }
-    private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
-                                ParseContext context, EndDocumentShieldingContentHandler handler)
-            throws IOException, SAXException, TikaException {
-        if (entry == null) return;
-
-        if (entry.getName().equals("mimetype")) {
-            String type = IOUtils.toString(zip, UTF_8);
-            metadata.set(Metadata.CONTENT_TYPE, type);
-        } else if (entry.getName().equals(META_NAME)) {
-            meta.parse(zip, new DefaultHandler(), metadata, context);
-        } else if (entry.getName().endsWith("content.xml")) {
-            if (content instanceof OpenDocumentContentParser) {
-                ((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
-            } else {
-                // Foreign content parser was set:
-                content.parse(zip, handler, metadata, context);
-            }
-        } else if (entry.getName().endsWith("styles.xml")) {
-            if (content instanceof OpenDocumentContentParser) {
-                ((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
-            } else {
-                // Foreign content parser was set:
-                content.parse(zip, handler, metadata, context);
-            }
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.odf;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipInputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.EndDocumentShieldingContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * OpenOffice parser
+ */
+public class OpenDocumentParser extends AbstractParser {
+
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = -6410276875438618287L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                    MediaType.application("vnd.sun.xml.writer"),
+                    MediaType.application("vnd.oasis.opendocument.text"),
+                    MediaType.application("vnd.oasis.opendocument.graphics"),
+                    MediaType.application("vnd.oasis.opendocument.presentation"),
+                    MediaType.application("vnd.oasis.opendocument.spreadsheet"),
+                    MediaType.application("vnd.oasis.opendocument.chart"),
+                    MediaType.application("vnd.oasis.opendocument.image"),
+                    MediaType.application("vnd.oasis.opendocument.formula"),
+                    MediaType.application("vnd.oasis.opendocument.text-master"),
+                    MediaType.application("vnd.oasis.opendocument.text-web"),
+                    MediaType.application("vnd.oasis.opendocument.text-template"),
+                    MediaType.application("vnd.oasis.opendocument.graphics-template"),
+                    MediaType.application("vnd.oasis.opendocument.presentation-template"),
+                    MediaType.application("vnd.oasis.opendocument.spreadsheet-template"),
+                    MediaType.application("vnd.oasis.opendocument.chart-template"),
+                    MediaType.application("vnd.oasis.opendocument.image-template"),
+                    MediaType.application("vnd.oasis.opendocument.formula-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.text"),
+                    MediaType.application("x-vnd.oasis.opendocument.graphics"),
+                    MediaType.application("x-vnd.oasis.opendocument.presentation"),
+                    MediaType.application("x-vnd.oasis.opendocument.spreadsheet"),
+                    MediaType.application("x-vnd.oasis.opendocument.chart"),
+                    MediaType.application("x-vnd.oasis.opendocument.image"),
+                    MediaType.application("x-vnd.oasis.opendocument.formula"),
+                    MediaType.application("x-vnd.oasis.opendocument.text-master"),
+                    MediaType.application("x-vnd.oasis.opendocument.text-web"),
+                    MediaType.application("x-vnd.oasis.opendocument.text-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.graphics-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.presentation-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.spreadsheet-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.chart-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.image-template"),
+                    MediaType.application("x-vnd.oasis.opendocument.formula-template"))));
+
+    private static final String META_NAME = "meta.xml";
+
+    private Parser meta = new OpenDocumentMetaParser();
+
+    private Parser content = new OpenDocumentContentParser();
+
+    public Parser getMetaParser() {
+        return meta;
+    }
+
+    public void setMetaParser(Parser meta) {
+        this.meta = meta;
+    }
+
+    public Parser getContentParser() {
+        return content;
+    }
+
+    public void setContentParser(Parser content) {
+        this.content = content;
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler baseHandler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+
+        // Open the Zip stream
+        // Use a File if we can, and an already open zip is even better
+        ZipFile zipFile = null;
+        ZipInputStream zipStream = null;
+        if (stream instanceof TikaInputStream) {
+            TikaInputStream tis = (TikaInputStream) stream;
+            Object container = ((TikaInputStream) stream).getOpenContainer();
+            if (container instanceof ZipFile) {
+                zipFile = (ZipFile) container;
+            } else if (tis.hasFile()) {
+                zipFile = new ZipFile(tis.getFile());
+            } else {
+                zipStream = new ZipInputStream(stream);
+            }
+        } else {
+            zipStream = new ZipInputStream(stream);
+        }
+
+        // Prepare to handle the content
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(baseHandler, metadata);
+
+        // As we don't know which of the metadata or the content
+        //  we'll hit first, catch the endDocument call initially
+        EndDocumentShieldingContentHandler handler =
+                new EndDocumentShieldingContentHandler(xhtml);
+
+        if (zipFile != null) {
+            try {
+                handleZipFile(zipFile, metadata, context, handler);
+            } finally {
+                //Do we want to close silently == catch an exception here?
+                zipFile.close();
+            }
+        } else {
+            try {
+                handleZipStream(zipStream, metadata, context, handler);
+            } finally {
+                //Do we want to close silently == catch an exception here?
+                zipStream.close();
+            }
+        }
+
+        // Only now call the end document
+        if (handler.getEndDocumentWasCalled()) {
+            handler.reallyEndDocument();
+        }
+    }
+
+    private void handleZipStream(ZipInputStream zipStream, Metadata metadata, ParseContext context, EndDocumentShieldingContentHandler handler) throws IOException, TikaException, SAXException {
+        ZipEntry entry = zipStream.getNextEntry();
+        while (entry != null) {
+            handleZipEntry(entry, zipStream, metadata, context, handler);
+            entry = zipStream.getNextEntry();
+        }
+    }
+
+    private void handleZipFile(ZipFile zipFile, Metadata metadata,
+                               ParseContext context, EndDocumentShieldingContentHandler handler)
+            throws IOException, TikaException, SAXException {
+        // If we can, process the metadata first, then the
+        //  rest of the file afterwards (TIKA-1353)
+        // Only possible to guarantee that when opened from a file not a stream
+
+        ZipEntry entry = zipFile.getEntry(META_NAME);
+        if (entry != null) {
+            handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+        }
+
+        Enumeration<? extends ZipEntry> entries = zipFile.entries();
+        while (entries.hasMoreElements()) {
+            entry = entries.nextElement();
+            if (!META_NAME.equals(entry.getName())) {
+                handleZipEntry(entry, zipFile.getInputStream(entry), metadata, context, handler);
+            }
+        }
+    }
+    private void handleZipEntry(ZipEntry entry, InputStream zip, Metadata metadata,
+                                ParseContext context, EndDocumentShieldingContentHandler handler)
+            throws IOException, SAXException, TikaException {
+        if (entry == null) return;
+
+        if (entry.getName().equals("mimetype")) {
+            String type = IOUtils.toString(zip, UTF_8);
+            metadata.set(Metadata.CONTENT_TYPE, type);
+        } else if (entry.getName().equals(META_NAME)) {
+            meta.parse(zip, new DefaultHandler(), metadata, context);
+        } else if (entry.getName().endsWith("content.xml")) {
+            if (content instanceof OpenDocumentContentParser) {
+                ((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
+            } else {
+                // Foreign content parser was set:
+                content.parse(zip, handler, metadata, context);
+            }
+        } else if (entry.getName().endsWith("styles.xml")) {
+            if (content instanceof OpenDocumentContentParser) {
+                ((OpenDocumentContentParser) content).parseInternal(zip, handler, metadata, context);
+            } else {
+                // Foreign content parser was set:
+                content.parse(zip, handler, metadata, context);
+            }
+        }
+    }
+}