You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/02 16:40:56 UTC
svn commit: r991955 [5/6] - in /tika/trunk: tika-core/src/test/java/org/apache/tika/ tika-core/src/test/java/org/apache/tika/detect/ tika-core/src/test/java/org/apache/tika/language/ tika-core/src/test/java/org/apache/tika/sax/ tika-core/src/test/resou...

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/testlargerbuffer.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java Thu Sep  2 14:40:55 2010
@@ -1,62 +1,62 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.jpeg;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-public class JpegParser implements Parser {
-
-    private static final Set<MediaType> SUPPORTED_TYPES =
-        Collections.singleton(MediaType.image("jpeg"));
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0.
-     */
-    public void parse(
-            InputStream stream, ContentHandler handler, Metadata metadata)
-            throws IOException, SAXException, TikaException {
-        parse(stream, handler, metadata, new ParseContext());
-    }
-
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-        new JpegExtractor(metadata).parse(stream);
-
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-        xhtml.endDocument();
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.jpeg;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class JpegParser implements Parser {
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.singleton(MediaType.image("jpeg"));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    /**
+     * @deprecated This method will be removed in Apache Tika 1.0.
+     */
+    public void parse(
+            InputStream stream, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        parse(stream, handler, metadata, new ParseContext());
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        new JpegExtractor(metadata).parse(stream);
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java Thu Sep  2 14:40:55 2010
@@ -1,78 +1,78 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.xmlbeans.XmlException;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Base class for all Tika OOXML extractors.
- * 
- * Tika extractors decorate POI extractors so that the parsed content of
- * documents is returned as a sequence of XHTML SAX events. Subclasses must
- * implement the buildXHTML method {@link #buildXHTML(XHTMLContentHandler)} that
- * populates the {@link XHTMLContentHandler} object received as parameter.
- */
-public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
-    protected POIXMLTextExtractor extractor;
-
-    private final String type;
-
-    public AbstractOOXMLExtractor(POIXMLTextExtractor extractor, String type) {
-        this.extractor = extractor;
-        this.type = type;
-    }
-
-    /**
-     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getDocument()
-     */
-    public POIXMLDocument getDocument() {
-        return extractor.getDocument();
-    }
-
-    /**
-     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getMetadataExtractor()
-     */
-    public MetadataExtractor getMetadataExtractor() {
-        return new MetadataExtractor(extractor, type);
-    }
-
-    /**
-     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getXHTML(org.xml.sax.ContentHandler,
-     *      org.apache.tika.metadata.Metadata)
-     */
-    public void getXHTML(ContentHandler handler, Metadata metadata)
-            throws SAXException, XmlException, IOException {
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-        buildXHTML(xhtml);
-        xhtml.endDocument();
-    }
-
-    /**
-     * Populates the {@link XHTMLContentHandler} object received as parameter.
-     */
-    protected abstract void buildXHTML(XHTMLContentHandler xhtml)
-            throws SAXException, XmlException, IOException;
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Base class for all Tika OOXML extractors.
+ * 
+ * Tika extractors decorate POI extractors so that the parsed content of
+ * documents is returned as a sequence of XHTML SAX events. Subclasses must
+ * implement the buildXHTML method {@link #buildXHTML(XHTMLContentHandler)} that
+ * populates the {@link XHTMLContentHandler} object received as parameter.
+ */
+public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
+    protected POIXMLTextExtractor extractor;
+
+    private final String type;
+
+    public AbstractOOXMLExtractor(POIXMLTextExtractor extractor, String type) {
+        this.extractor = extractor;
+        this.type = type;
+    }
+
+    /**
+     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getDocument()
+     */
+    public POIXMLDocument getDocument() {
+        return extractor.getDocument();
+    }
+
+    /**
+     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getMetadataExtractor()
+     */
+    public MetadataExtractor getMetadataExtractor() {
+        return new MetadataExtractor(extractor, type);
+    }
+
+    /**
+     * @see org.apache.tika.parser.microsoft.ooxml.OOXMLExtractor#getXHTML(org.xml.sax.ContentHandler,
+     *      org.apache.tika.metadata.Metadata)
+     */
+    public void getXHTML(ContentHandler handler, Metadata metadata)
+            throws SAXException, XmlException, IOException {
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        buildXHTML(xhtml);
+        xhtml.endDocument();
+    }
+
+    /**
+     * Populates the {@link XHTMLContentHandler} object received as parameter.
+     */
+    protected abstract void buildXHTML(XHTMLContentHandler xhtml)
+            throws SAXException, XmlException, IOException;
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java Thu Sep  2 14:40:55 2010
@@ -1,147 +1,147 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.util.Date;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.POIXMLProperties.CoreProperties;
-import org.apache.poi.POIXMLProperties.ExtendedProperties;
-import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
-import org.apache.poi.openxml4j.util.Nullable;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.PagedText;
-import org.apache.tika.metadata.Property;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
-
-/**
- * OOXML metadata extractor.
- * 
- * Currently POI doesn't support metadata extraction for OOXML.
- * 
- * @see OOXMLExtractor#getMetadataExtractor()
- */
-public class MetadataExtractor {
-
-    private final POIXMLTextExtractor extractor;
-
-    private final String type;
-
-    public MetadataExtractor(POIXMLTextExtractor extractor, String type) {
-        this.extractor = extractor;
-        this.type = type;
-    }
-
-    public void extract(Metadata metadata) throws TikaException {
-        addProperty(metadata, Metadata.CONTENT_TYPE, type);
-        extractMetadata(extractor.getCoreProperties(), metadata);
-        extractMetadata(extractor.getExtendedProperties(), metadata);
-    }
-
-    private void extractMetadata(CoreProperties properties, Metadata metadata) {
-        PackagePropertiesPart propsHolder = properties
-                .getUnderlyingProperties();
-
-        addProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
-        addProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
-                .getContentStatusProperty());
-        addProperty(metadata, Metadata.DATE, propsHolder
-                .getCreatedPropertyString());
-        addProperty(metadata, Metadata.CREATION_DATE, propsHolder
-                .getCreatedProperty());
-        addProperty(metadata, Metadata.CREATOR, propsHolder
-                .getCreatorProperty());
-        addProperty(metadata, Metadata.AUTHOR, propsHolder
-                .getCreatorProperty());
-        addProperty(metadata, Metadata.DESCRIPTION, propsHolder
-                .getDescriptionProperty());
-        addProperty(metadata, Metadata.IDENTIFIER, propsHolder
-                .getIdentifierProperty());
-        addProperty(metadata, Metadata.KEYWORDS, propsHolder
-                .getKeywordsProperty());
-        addProperty(metadata, Metadata.LANGUAGE, propsHolder
-                .getLanguageProperty());
-        addProperty(metadata, Metadata.LAST_AUTHOR, propsHolder
-                .getLastModifiedByProperty());
-        addProperty(metadata, Metadata.LAST_PRINTED, propsHolder
-                .getLastPrintedPropertyString());
-        addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder
-                .getModifiedProperty());
-        addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
-                .getRevisionProperty());
-        addProperty(metadata, Metadata.SUBJECT, propsHolder
-                .getSubjectProperty());
-        addProperty(metadata, Metadata.TITLE, propsHolder.getTitleProperty());
-        addProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
-    }
-
-    private void extractMetadata(ExtendedProperties properties,
-            Metadata metadata) {
-        CTProperties propsHolder = properties.getUnderlyingProperties();
-
-        addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder
-                .getApplication());
-        addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder
-                .getAppVersion());
-        addProperty(metadata, Metadata.CHARACTER_COUNT, propsHolder
-                .getCharacters());
-        addProperty(metadata, Metadata.CHARACTER_COUNT_WITH_SPACES, propsHolder
-                .getCharactersWithSpaces());
-        addProperty(metadata, Metadata.PUBLISHER, propsHolder.getCompany());
-        addProperty(metadata, Metadata.LINE_COUNT, propsHolder.getLines());
-        addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
-        addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
-        addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
-        if (propsHolder.getPages() > 0) {
-            metadata.set(PagedText.N_PAGES, propsHolder.getPages());
-        } else if (propsHolder.getSlides() > 0) {
-            metadata.set(PagedText.N_PAGES, propsHolder.getSlides());
-        }
-        addProperty(metadata, Metadata.PARAGRAPH_COUNT, propsHolder.getParagraphs());
-        addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
-                .getPresentationFormat());
-        addProperty(metadata, Metadata.SLIDE_COUNT, propsHolder.getSlides());
-        addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
-        addProperty(metadata, Metadata.TOTAL_TIME, propsHolder.getTotalTime());
-        addProperty(metadata, Metadata.WORD_COUNT, propsHolder.getWords());
-    }
-
-    private void addProperty(Metadata metadata, Property property, Nullable<Date> value) {
-        if (value.getValue() != null) {
-            metadata.set(property, value.getValue());
-        }
-    }
-
-    private void addProperty(Metadata metadata, String name, Nullable<?> value) {
-        if (value.getValue() != null) {
-            addProperty(metadata, name, value.getValue().toString());
-        }
-    }
-
-    private void addProperty(Metadata metadata, String name, String value) {
-        if (value != null) {
-            metadata.set(name, value);
-        }
-    }
-
-    private void addProperty(Metadata metadata, String name, long value) {
-        if (value > 0) {
-            metadata.set(name, Long.toString(value));
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.util.Date;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.POIXMLProperties.CoreProperties;
+import org.apache.poi.POIXMLProperties.ExtendedProperties;
+import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
+import org.apache.poi.openxml4j.util.Nullable;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.PagedText;
+import org.apache.tika.metadata.Property;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
+
+/**
+ * OOXML metadata extractor.
+ * 
+ * Currently POI doesn't support metadata extraction for OOXML.
+ * 
+ * @see OOXMLExtractor#getMetadataExtractor()
+ */
+public class MetadataExtractor {
+
+    private final POIXMLTextExtractor extractor;
+
+    private final String type;
+
+    public MetadataExtractor(POIXMLTextExtractor extractor, String type) {
+        this.extractor = extractor;
+        this.type = type;
+    }
+
+    public void extract(Metadata metadata) throws TikaException {
+        addProperty(metadata, Metadata.CONTENT_TYPE, type);
+        extractMetadata(extractor.getCoreProperties(), metadata);
+        extractMetadata(extractor.getExtendedProperties(), metadata);
+    }
+
+    private void extractMetadata(CoreProperties properties, Metadata metadata) {
+        PackagePropertiesPart propsHolder = properties
+                .getUnderlyingProperties();
+
+        addProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
+        addProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
+                .getContentStatusProperty());
+        addProperty(metadata, Metadata.DATE, propsHolder
+                .getCreatedPropertyString());
+        addProperty(metadata, Metadata.CREATION_DATE, propsHolder
+                .getCreatedProperty());
+        addProperty(metadata, Metadata.CREATOR, propsHolder
+                .getCreatorProperty());
+        addProperty(metadata, Metadata.AUTHOR, propsHolder
+                .getCreatorProperty());
+        addProperty(metadata, Metadata.DESCRIPTION, propsHolder
+                .getDescriptionProperty());
+        addProperty(metadata, Metadata.IDENTIFIER, propsHolder
+                .getIdentifierProperty());
+        addProperty(metadata, Metadata.KEYWORDS, propsHolder
+                .getKeywordsProperty());
+        addProperty(metadata, Metadata.LANGUAGE, propsHolder
+                .getLanguageProperty());
+        addProperty(metadata, Metadata.LAST_AUTHOR, propsHolder
+                .getLastModifiedByProperty());
+        addProperty(metadata, Metadata.LAST_PRINTED, propsHolder
+                .getLastPrintedPropertyString());
+        addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder
+                .getModifiedProperty());
+        addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
+                .getRevisionProperty());
+        addProperty(metadata, Metadata.SUBJECT, propsHolder
+                .getSubjectProperty());
+        addProperty(metadata, Metadata.TITLE, propsHolder.getTitleProperty());
+        addProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
+    }
+
+    private void extractMetadata(ExtendedProperties properties,
+            Metadata metadata) {
+        CTProperties propsHolder = properties.getUnderlyingProperties();
+
+        addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder
+                .getApplication());
+        addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder
+                .getAppVersion());
+        addProperty(metadata, Metadata.CHARACTER_COUNT, propsHolder
+                .getCharacters());
+        addProperty(metadata, Metadata.CHARACTER_COUNT_WITH_SPACES, propsHolder
+                .getCharactersWithSpaces());
+        addProperty(metadata, Metadata.PUBLISHER, propsHolder.getCompany());
+        addProperty(metadata, Metadata.LINE_COUNT, propsHolder.getLines());
+        addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
+        addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
+        addProperty(metadata, Metadata.PAGE_COUNT, propsHolder.getPages());
+        if (propsHolder.getPages() > 0) {
+            metadata.set(PagedText.N_PAGES, propsHolder.getPages());
+        } else if (propsHolder.getSlides() > 0) {
+            metadata.set(PagedText.N_PAGES, propsHolder.getSlides());
+        }
+        addProperty(metadata, Metadata.PARAGRAPH_COUNT, propsHolder.getParagraphs());
+        addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder
+                .getPresentationFormat());
+        addProperty(metadata, Metadata.SLIDE_COUNT, propsHolder.getSlides());
+        addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
+        addProperty(metadata, Metadata.TOTAL_TIME, propsHolder.getTotalTime());
+        addProperty(metadata, Metadata.WORD_COUNT, propsHolder.getWords());
+    }
+
+    private void addProperty(Metadata metadata, Property property, Nullable<Date> value) {
+        if (value.getValue() != null) {
+            metadata.set(property, value.getValue());
+        }
+    }
+
+    private void addProperty(Metadata metadata, String name, Nullable<?> value) {
+        if (value.getValue() != null) {
+            addProperty(metadata, name, value.getValue().toString());
+        }
+    }
+
+    private void addProperty(Metadata metadata, String name, String value) {
+        if (value != null) {
+            metadata.set(name, value);
+        }
+    }
+
+    private void addProperty(Metadata metadata, String name, long value) {
+        if (value > 0) {
+            metadata.set(name, Long.toString(value));
+        }
+    }
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java Thu Sep  2 14:40:55 2010
@@ -1,53 +1,53 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.tika.metadata.Metadata;
-import org.apache.xmlbeans.XmlException;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Interface implemented by all Tika OOXML extractors.
- * 
- * @see org.apache.poi.POIXMLTextExtractor
- */
-public interface OOXMLExtractor {
-
-    /**
-     * Returns the opened document.
-     * 
-     * @see POIXMLTextExtractor#getDocument()
-     */
-    POIXMLDocument getDocument();
-
-    /**
-     * {@link POIXMLTextExtractor#getMetadataTextExtractor()} not yet supported
-     * for OOXML by POI.
-     */
-    MetadataExtractor getMetadataExtractor();
-
-    /**
-     * Parses the document into a sequence of XHTML SAX events sent to the
-     * given content handler.
-     */
-    void getXHTML(ContentHandler handler, Metadata metadata)
-            throws SAXException, XmlException, IOException;
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.tika.metadata.Metadata;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Interface implemented by all Tika OOXML extractors.
+ * 
+ * @see org.apache.poi.POIXMLTextExtractor
+ */
+public interface OOXMLExtractor {
+
+    /**
+     * Returns the opened document.
+     * 
+     * @see POIXMLTextExtractor#getDocument()
+     */
+    POIXMLDocument getDocument();
+
+    /**
+     * {@link POIXMLTextExtractor#getMetadataTextExtractor()} not yet supported
+     * for OOXML by POI.
+     */
+    MetadataExtractor getMetadataExtractor();
+
+    /**
+     * Parses the document into a sequence of XHTML SAX events sent to the
+     * given content handler.
+     */
+    void getXHTML(ContentHandler handler, Metadata metadata)
+            throws SAXException, XmlException, IOException;
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java Thu Sep  2 14:40:55 2010
@@ -1,99 +1,99 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Locale;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.extractor.ExtractorFactory;
-import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.poi.openxml4j.opc.OPCPackage;
-import org.apache.poi.xslf.XSLFSlideShow;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.xmlbeans.XmlException;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Figures out the correct {@link OOXMLExtractor} for the supplied document and
- * returns it.
- */
-public class OOXMLExtractorFactory {
-
-    public static void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, Locale locale)
-            throws IOException, SAXException, TikaException {
-        try {
-            OOXMLExtractor extractor;
-
-            POIXMLTextExtractor poiExtractor;
-            if(stream instanceof TikaInputStream && 
-            	    ((TikaInputStream)stream).getOpenContainer() != null) {
-               poiExtractor = ExtractorFactory.createExtractor(
-                    (OPCPackage)((TikaInputStream)stream).getOpenContainer()
-               );
-            } else {
-               poiExtractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(stream);
-            }
-            
-            POIXMLDocument document = poiExtractor.getDocument();
-            if (document instanceof XSLFSlideShow) {
-                extractor = new XSLFPowerPointExtractorDecorator(
-                        (XSLFPowerPointExtractor) poiExtractor);
-            } else if (document instanceof XSSFWorkbook) {
-                extractor = new XSSFExcelExtractorDecorator(
-                        (XSSFExcelExtractor) poiExtractor, locale);
-            } else if (document instanceof XWPFDocument) {
-                extractor = new XWPFWordExtractorDecorator(
-                        (XWPFWordExtractor) poiExtractor);
-            } else {
-                extractor = new POIXMLTextExtractorDecorator(poiExtractor);
-            }
-
-            extractor.getMetadataExtractor().extract(metadata);
-            extractor.getXHTML(handler, metadata);
-        } catch (IllegalArgumentException e) {
-            if (e.getMessage().startsWith("No supported documents found")) {
-                throw new TikaException(
-                        "TIKA-418: RuntimeException while getting content"
-                        + " for thmx and xps file types", e);
-            } else {
-                throw new TikaException("Error creating OOXML extractor", e);
-            }
-        } catch (InvalidFormatException e) {
-            throw new TikaException("Error creating OOXML extractor", e);
-        } catch (OpenXML4JException e) {
-            throw new TikaException("Error creating OOXML extractor", e);
-        } catch (XmlException e) {
-            throw new TikaException("Error creating OOXML extractor", e);
-
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Locale;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Figures out the correct {@link OOXMLExtractor} for the supplied document and
+ * returns it.
+ */
+public class OOXMLExtractorFactory {
+
+    public static void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, Locale locale)
+            throws IOException, SAXException, TikaException {
+        try {
+            OOXMLExtractor extractor;
+
+            POIXMLTextExtractor poiExtractor;
+            if(stream instanceof TikaInputStream && 
+            	    ((TikaInputStream)stream).getOpenContainer() != null) {
+               poiExtractor = ExtractorFactory.createExtractor(
+                    (OPCPackage)((TikaInputStream)stream).getOpenContainer()
+               );
+            } else {
+               poiExtractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(stream);
+            }
+            
+            POIXMLDocument document = poiExtractor.getDocument();
+            if (document instanceof XSLFSlideShow) {
+                extractor = new XSLFPowerPointExtractorDecorator(
+                        (XSLFPowerPointExtractor) poiExtractor);
+            } else if (document instanceof XSSFWorkbook) {
+                extractor = new XSSFExcelExtractorDecorator(
+                        (XSSFExcelExtractor) poiExtractor, locale);
+            } else if (document instanceof XWPFDocument) {
+                extractor = new XWPFWordExtractorDecorator(
+                        (XWPFWordExtractor) poiExtractor);
+            } else {
+                extractor = new POIXMLTextExtractorDecorator(poiExtractor);
+            }
+
+            extractor.getMetadataExtractor().extract(metadata);
+            extractor.getXHTML(handler, metadata);
+        } catch (IllegalArgumentException e) {
+            if (e.getMessage().startsWith("No supported documents found")) {
+                throw new TikaException(
+                        "TIKA-418: RuntimeException while getting content"
+                        + " for thmx and xps file types", e);
+            } else {
+                throw new TikaException("Error creating OOXML extractor", e);
+            }
+        } catch (InvalidFormatException e) {
+            throw new TikaException("Error creating OOXML extractor", e);
+        } catch (OpenXML4JException e) {
+            throw new TikaException("Error creating OOXML extractor", e);
+        } catch (XmlException e) {
+            throw new TikaException("Error creating OOXML extractor", e);
+
+        }
+    }
+
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java Thu Sep  2 14:40:55 2010
@@ -1,81 +1,81 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * Office Open XML (OOXML) parser.
- * 
- */
-public class OOXMLParser implements Parser {
-
-    private static final Set<MediaType> SUPPORTED_TYPES =
-        Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
-                MediaType.application("x-tika-ooxml"),
-                MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"),
-                MediaType.application("vnd.ms-powerpoint.presentation.macroenabled.12"),
-                MediaType.application("vnd.openxmlformats-officedocument.presentationml.template"),
-                MediaType.application("vnd.openxmlformats-officedocument.presentationml.slideshow"),
-                MediaType.application("vnd.ms-powerpoint.slideshow.macroenabled.12"),
-                MediaType.application("vnd.ms-powerpoint.addin.macroenabled.12"),
-                MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
-                MediaType.application("vnd.ms-excel.sheet.macroenabled.12"),
-                MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template"),
-                MediaType.application("vnd.ms-excel.template.macroenabled.12"),
-                MediaType.application("vnd.ms-excel.addin.macroenabled.12"),
-                MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document"),
-                MediaType.application("vnd.ms-word.document.macroenabled.12"),
-                MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.template"),
-                MediaType.application("vnd.ms-word.template.macroenabled.12"))));
-
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        return SUPPORTED_TYPES;
-    }
-
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
-        Locale locale = context.get(Locale.class, Locale.getDefault());
-        OOXMLExtractorFactory.parse(stream, handler, metadata, locale);
-    }
-
-    /**
-     * @deprecated This method will be removed in Apache Tika 1.0.
-     */
-    public void parse(
-            InputStream stream, ContentHandler handler, Metadata metadata)
-            throws IOException, SAXException, TikaException {
-        parse(stream, handler, metadata, new ParseContext());
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Office Open XML (OOXML) parser.
+ * 
+ */
+public class OOXMLParser implements Parser {
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                MediaType.application("x-tika-ooxml"),
+                MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation"),
+                MediaType.application("vnd.ms-powerpoint.presentation.macroenabled.12"),
+                MediaType.application("vnd.openxmlformats-officedocument.presentationml.template"),
+                MediaType.application("vnd.openxmlformats-officedocument.presentationml.slideshow"),
+                MediaType.application("vnd.ms-powerpoint.slideshow.macroenabled.12"),
+                MediaType.application("vnd.ms-powerpoint.addin.macroenabled.12"),
+                MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+                MediaType.application("vnd.ms-excel.sheet.macroenabled.12"),
+                MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.template"),
+                MediaType.application("vnd.ms-excel.template.macroenabled.12"),
+                MediaType.application("vnd.ms-excel.addin.macroenabled.12"),
+                MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.document"),
+                MediaType.application("vnd.ms-word.document.macroenabled.12"),
+                MediaType.application("vnd.openxmlformats-officedocument.wordprocessingml.template"),
+                MediaType.application("vnd.ms-word.template.macroenabled.12"))));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        Locale locale = context.get(Locale.class, Locale.getDefault());
+        OOXMLExtractorFactory.parse(stream, handler, metadata, locale);
+    }
+
+    /**
+     * @deprecated This method will be removed in Apache Tika 1.0.
+     */
+    public void parse(
+            InputStream stream, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        parse(stream, handler, metadata, new ParseContext());
+    }
+
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java Thu Sep  2 14:40:55 2010
@@ -1,34 +1,34 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.SAXException;
-
-public class POIXMLTextExtractorDecorator extends AbstractOOXMLExtractor {
-
-    public POIXMLTextExtractorDecorator(POIXMLTextExtractor extractor) {
-        super(extractor, null);
-    }
-
-    @Override
-    protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException {
-        // extract document content as a single string (not structured)
-        xhtml.element("p", extractor.getText());
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.SAXException;
+
+public class POIXMLTextExtractorDecorator extends AbstractOOXMLExtractor {
+
+    public POIXMLTextExtractorDecorator(POIXMLTextExtractor extractor) {
+        super(extractor, null);
+    }
+
+    @Override
+    protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException {
+        // extract document content as a single string (not structured)
+        xhtml.element("p", extractor.getText());
+    }
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java Thu Sep  2 14:40:55 2010
@@ -1,96 +1,96 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-
-import org.apache.poi.xslf.XSLFSlideShow;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xslf.usermodel.XMLSlideShow;
-import org.apache.poi.xslf.usermodel.XSLFSlide;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.xml.sax.SAXException;
-
-public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
-
-    public XSLFPowerPointExtractorDecorator(XSLFPowerPointExtractor extractor) {
-        super(extractor, "application/vnd.openxmlformats-officedocument.presentationml.presentation");
-    }
-
-    /**
-     * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText()
-     */
-    @Override
-    protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
-            XmlException, IOException {
-        XSLFSlideShow slideShow = (XSLFSlideShow) extractor.getDocument();
-        XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
-
-        XSLFSlide[] slides = xmlSlideShow.getSlides();
-        for (XSLFSlide slide : slides) {
-            CTSlide rawSlide = slide._getCTSlide();
-            CTSlideIdListEntry slideId = slide._getCTSlideId();
-
-            CTNotesSlide notes = xmlSlideShow._getXSLFSlideShow().getNotes(
-                    slideId);
-            CTCommentList comments = xmlSlideShow._getXSLFSlideShow()
-                    .getSlideComments(slideId);
-
-            xhtml.startElement("div");
-            extractShapeContent(rawSlide.getCSld().getSpTree(), xhtml);
-
-            if (comments != null) {
-                for (CTComment comment : comments.getCmArray()) {
-                    xhtml.element("p", comment.getText());
-                }
-            }
-
-            if (notes != null) {
-                extractShapeContent(notes.getCSld().getSpTree(), xhtml);
-            }
-            xhtml.endElement("div");
-        }
-    }
-
-    private void extractShapeContent(CTGroupShape gs, XHTMLContentHandler xhtml)
-            throws SAXException {
-        CTShape[] shapes = gs.getSpArray();
-        for (CTShape shape : shapes) {
-            CTTextBody textBody = shape.getTxBody();
-            if (textBody != null) {
-                CTTextParagraph[] paras = textBody.getPArray();
-                for (CTTextParagraph textParagraph : paras) {
-                    CTRegularTextRun[] textRuns = textParagraph.getRArray();
-                    for (CTRegularTextRun textRun : textRuns) {
-                        xhtml.element("p", textRun.getT());
-                    }
-                }
-            }
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.xml.sax.SAXException;
+
+public class XSLFPowerPointExtractorDecorator extends AbstractOOXMLExtractor {
+
+    public XSLFPowerPointExtractorDecorator(XSLFPowerPointExtractor extractor) {
+        super(extractor, "application/vnd.openxmlformats-officedocument.presentationml.presentation");
+    }
+
+    /**
+     * @see org.apache.poi.xslf.extractor.XSLFPowerPointExtractor#getText()
+     */
+    @Override
+    protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
+            XmlException, IOException {
+        XSLFSlideShow slideShow = (XSLFSlideShow) extractor.getDocument();
+        XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
+
+        XSLFSlide[] slides = xmlSlideShow.getSlides();
+        for (XSLFSlide slide : slides) {
+            CTSlide rawSlide = slide._getCTSlide();
+            CTSlideIdListEntry slideId = slide._getCTSlideId();
+
+            CTNotesSlide notes = xmlSlideShow._getXSLFSlideShow().getNotes(
+                    slideId);
+            CTCommentList comments = xmlSlideShow._getXSLFSlideShow()
+                    .getSlideComments(slideId);
+
+            xhtml.startElement("div");
+            extractShapeContent(rawSlide.getCSld().getSpTree(), xhtml);
+
+            if (comments != null) {
+                for (CTComment comment : comments.getCmArray()) {
+                    xhtml.element("p", comment.getText());
+                }
+            }
+
+            if (notes != null) {
+                extractShapeContent(notes.getCSld().getSpTree(), xhtml);
+            }
+            xhtml.endElement("div");
+        }
+    }
+
+    private void extractShapeContent(CTGroupShape gs, XHTMLContentHandler xhtml)
+            throws SAXException {
+        CTShape[] shapes = gs.getSpArray();
+        for (CTShape shape : shapes) {
+            CTTextBody textBody = shape.getTxBody();
+            if (textBody != null) {
+                CTTextParagraph[] paras = textBody.getPArray();
+                for (CTTextParagraph textParagraph : paras) {
+                    CTRegularTextRun[] textRuns = textParagraph.getRArray();
+                    for (CTRegularTextRun textRun : textRuns) {
+                        xhtml.element("p", textRun.getT());
+                    }
+                }
+            }
+        }
+    }
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java Thu Sep  2 14:40:55 2010
@@ -1,162 +1,162 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Locale;
-
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.ss.usermodel.Cell;
-import org.apache.poi.ss.usermodel.CellStyle;
-import org.apache.poi.ss.usermodel.Comment;
-import org.apache.poi.ss.usermodel.DataFormatter;
-import org.apache.poi.ss.usermodel.HeaderFooter;
-import org.apache.poi.ss.usermodel.Row;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xssf.usermodel.XSSFCell;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
-import org.apache.tika.exception.TikaException;
-import org.apache.xmlbeans.XmlException;
-import org.xml.sax.SAXException;
-
-public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
-
-    /**
-     * Internal <code>DataFormatter</code> for formatting Numbers.
-     */
-    private final DataFormatter formatter;
-
-    private final XSSFExcelExtractor extractor;
-    private static final String TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
-
-    public XSSFExcelExtractorDecorator(
-            XSSFExcelExtractor extractor, Locale locale) {
-        super(extractor, TYPE);
-
-        this.extractor = extractor;
-        formatter = new DataFormatter(locale);
-    }
-
-    /**
-     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
-     */
-    @Override
-    protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
-            XmlException, IOException {
-        XSSFWorkbook document = (XSSFWorkbook) extractor.getDocument();
-
-        for (int i = 0; i < document.getNumberOfSheets(); i++) {
-            xhtml.startElement("div");
-            XSSFSheet sheet = (XSSFSheet) document.getSheetAt(i);
-            xhtml.element("h1", document.getSheetName(i));
-
-            // Header(s), if present
-            extractHeaderFooter(sheet.getFirstHeader(), xhtml);
-            extractHeaderFooter(sheet.getOddHeader(), xhtml);
-            extractHeaderFooter(sheet.getEvenHeader(), xhtml);
-
-            xhtml.startElement("table");
-            xhtml.startElement("tbody");
-
-            // Rows and cells
-            for (Object rawR : sheet) {
-                xhtml.startElement("tr");
-                Row row = (Row) rawR;
-                for (Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
-                    xhtml.startElement("td");
-                    Cell cell = ri.next();
-
-                    int type = cell.getCellType();
-                    if (type == Cell.CELL_TYPE_FORMULA) {
-                        type = cell.getCachedFormulaResultType();
-                    }
-                    if (type == Cell.CELL_TYPE_STRING) {
-                        xhtml.characters(cell.getRichStringCellValue()
-                                .getString());
-                    } else if (type == Cell.CELL_TYPE_NUMERIC) {
-                        CellStyle style = cell.getCellStyle();
-                        xhtml.characters(
-                            formatter.formatRawCellContents(cell.getNumericCellValue(),
-                                                            style.getDataFormat(),
-                                                            style.getDataFormatString()));
-                    } else {
-                        XSSFCell xc = (XSSFCell) cell;
-                        String rawValue = xc.getRawValue();
-                        if (rawValue != null) {
-                            xhtml.characters(rawValue);
-                        }
-
-                    }
-
-                    // Output the comment in the same cell as the content
-                    Comment comment = cell.getCellComment();
-                    if (comment != null) {
-                        xhtml.characters(comment.getString().getString());
-                    }
-
-                    xhtml.endElement("td");
-                }
-                xhtml.endElement("tr");
-            }
-
-            xhtml.endElement("tbody");
-            xhtml.endElement("table");
-
-            // Finally footer(s), if present
-            extractHeaderFooter(sheet.getFirstFooter(), xhtml);
-            extractHeaderFooter(sheet.getOddFooter(), xhtml);
-            extractHeaderFooter(sheet.getEvenFooter(), xhtml);
-
-            xhtml.endElement("div");
-        }
-    }
-
-    private void extractHeaderFooter(HeaderFooter hf, XHTMLContentHandler xhtml)
-            throws SAXException {
-        String content = ExcelExtractor._extractHeaderFooter(hf);
-        if (content.length() > 0) {
-            xhtml.element("p", content);
-        }
-    }
-
-    @Override
-    public MetadataExtractor getMetadataExtractor() {
-        return new MetadataExtractor(extractor, TYPE) {
-            @Override
-            public void extract(Metadata metadata) throws TikaException {
-                super.extract(metadata);
-
-                metadata.set(TikaMetadataKeys.PROTECTED, "false");
-
-                XSSFWorkbook document = (XSSFWorkbook) extractor.getDocument();
-
-                for (int i = 0; i < document.getNumberOfSheets(); i++) {
-                    XSSFSheet sheet = document.getSheetAt(i);
-
-                    if (sheet.getProtect()) {
-                        metadata.set(TikaMetadataKeys.PROTECTED, "true");
-                    }
-                }
-            }
-        };
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Locale;
+
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.CellStyle;
+import org.apache.poi.ss.usermodel.Comment;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.usermodel.HeaderFooter;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xssf.usermodel.XSSFCell;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.exception.TikaException;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.SAXException;
+
+public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
+
+    /**
+     * Internal <code>DataFormatter</code> for formatting Numbers.
+     */
+    private final DataFormatter formatter;
+
+    private final XSSFExcelExtractor extractor;
+    private static final String TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
+
+    public XSSFExcelExtractorDecorator(
+            XSSFExcelExtractor extractor, Locale locale) {
+        super(extractor, TYPE);
+
+        this.extractor = extractor;
+        formatter = new DataFormatter(locale);
+    }
+
+    /**
+     * @see org.apache.poi.xssf.extractor.XSSFExcelExtractor#getText()
+     */
+    @Override
+    protected void buildXHTML(XHTMLContentHandler xhtml) throws SAXException,
+            XmlException, IOException {
+        XSSFWorkbook document = (XSSFWorkbook) extractor.getDocument();
+
+        for (int i = 0; i < document.getNumberOfSheets(); i++) {
+            xhtml.startElement("div");
+            XSSFSheet sheet = (XSSFSheet) document.getSheetAt(i);
+            xhtml.element("h1", document.getSheetName(i));
+
+            // Header(s), if present
+            extractHeaderFooter(sheet.getFirstHeader(), xhtml);
+            extractHeaderFooter(sheet.getOddHeader(), xhtml);
+            extractHeaderFooter(sheet.getEvenHeader(), xhtml);
+
+            xhtml.startElement("table");
+            xhtml.startElement("tbody");
+
+            // Rows and cells
+            for (Object rawR : sheet) {
+                xhtml.startElement("tr");
+                Row row = (Row) rawR;
+                for (Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
+                    xhtml.startElement("td");
+                    Cell cell = ri.next();
+
+                    int type = cell.getCellType();
+                    if (type == Cell.CELL_TYPE_FORMULA) {
+                        type = cell.getCachedFormulaResultType();
+                    }
+                    if (type == Cell.CELL_TYPE_STRING) {
+                        xhtml.characters(cell.getRichStringCellValue()
+                                .getString());
+                    } else if (type == Cell.CELL_TYPE_NUMERIC) {
+                        CellStyle style = cell.getCellStyle();
+                        xhtml.characters(
+                            formatter.formatRawCellContents(cell.getNumericCellValue(),
+                                                            style.getDataFormat(),
+                                                            style.getDataFormatString()));
+                    } else {
+                        XSSFCell xc = (XSSFCell) cell;
+                        String rawValue = xc.getRawValue();
+                        if (rawValue != null) {
+                            xhtml.characters(rawValue);
+                        }
+
+                    }
+
+                    // Output the comment in the same cell as the content
+                    Comment comment = cell.getCellComment();
+                    if (comment != null) {
+                        xhtml.characters(comment.getString().getString());
+                    }
+
+                    xhtml.endElement("td");
+                }
+                xhtml.endElement("tr");
+            }
+
+            xhtml.endElement("tbody");
+            xhtml.endElement("table");
+
+            // Finally footer(s), if present
+            extractHeaderFooter(sheet.getFirstFooter(), xhtml);
+            extractHeaderFooter(sheet.getOddFooter(), xhtml);
+            extractHeaderFooter(sheet.getEvenFooter(), xhtml);
+
+            xhtml.endElement("div");
+        }
+    }
+
+    private void extractHeaderFooter(HeaderFooter hf, XHTMLContentHandler xhtml)
+            throws SAXException {
+        String content = ExcelExtractor._extractHeaderFooter(hf);
+        if (content.length() > 0) {
+            xhtml.element("p", content);
+        }
+    }
+
+    @Override
+    public MetadataExtractor getMetadataExtractor() {
+        return new MetadataExtractor(extractor, TYPE) {
+            @Override
+            public void extract(Metadata metadata) throws TikaException {
+                super.extract(metadata);
+
+                metadata.set(TikaMetadataKeys.PROTECTED, "false");
+
+                XSSFWorkbook document = (XSSFWorkbook) extractor.getDocument();
+
+                for (int i = 0; i < document.getNumberOfSheets(); i++) {
+                    XSSFSheet sheet = document.getSheetAt(i);
+
+                    if (sheet.getProtect()) {
+                        metadata.set(TikaMetadataKeys.PROTECTED, "true");
+                    }
+                }
+            }
+        };
+    }
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
------------------------------------------------------------------------------
    svn:eol-style = native