You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/02 16:40:56 UTC
svn commit: r991955 [6/6] - in /tika/trunk:
tika-core/src/test/java/org/apache/tika/
tika-core/src/test/java/org/apache/tika/detect/
tika-core/src/test/java/org/apache/tika/language/
tika-core/src/test/java/org/apache/tika/sax/ tika-core/src/test/resou...
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java Thu Sep 2 14:40:55 2010
@@ -1,166 +1,166 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
-import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
-import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
-import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.apache.poi.xwpf.usermodel.XWPFParagraph;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
-import org.xml.sax.SAXException;
-
-public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
-
- public XWPFWordExtractorDecorator(XWPFWordExtractor extractor) {
- super(extractor, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
- }
-
- /**
- * @see org.apache.poi.xwpf.extractor.XWPFWordExtractor#getText()
- */
- @Override
- protected void buildXHTML(XHTMLContentHandler xhtml)
- throws SAXException, XmlException, IOException {
- XWPFDocument document = (XWPFDocument) extractor.getDocument();
- XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
-
- // headers
- extractHeaders(xhtml, hfPolicy);
-
- // first all paragraphs
- Iterator<XWPFParagraph> i = document.getParagraphsIterator();
- while (i.hasNext()) {
- XWPFParagraph paragraph = i.next();
-
- CTSectPr ctSectPr = null;
- if (paragraph.getCTP().getPPr() != null) {
- ctSectPr = paragraph.getCTP().getPPr().getSectPr();
- }
-
- XWPFHeaderFooterPolicy headerFooterPolicy = null;
-
- if (ctSectPr != null) {
- headerFooterPolicy =
- new XWPFHeaderFooterPolicy(document, ctSectPr);
- extractHeaders(xhtml, headerFooterPolicy);
- }
-
- XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
- new XWPFHyperlinkDecorator(paragraph, null, true));
-
- CTBookmark[] bookmarks = paragraph.getCTP().getBookmarkStartArray();
- for (CTBookmark bookmark : bookmarks) {
- xhtml.element("p", bookmark.getName());
- }
-
- xhtml.element("p", decorator.getText());
-
- if (ctSectPr != null) {
- extractFooters(xhtml, headerFooterPolicy);
- }
- }
-
- // then all document tables
- extractTableContent(document, xhtml);
- extractFooters(xhtml, hfPolicy);
- }
-
- private void extractFooters(
- XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
- throws SAXException {
- // footers
- if (hfPolicy.getFirstPageFooter() != null) {
- xhtml.element("p", hfPolicy.getFirstPageFooter().getText());
- }
- if (hfPolicy.getEvenPageFooter() != null) {
- xhtml.element("p", hfPolicy.getEvenPageFooter().getText());
- }
- if (hfPolicy.getDefaultFooter() != null) {
- xhtml.element("p", hfPolicy.getDefaultFooter().getText());
- }
- }
-
- private void extractHeaders(
- XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
- throws SAXException {
- if (hfPolicy.getFirstPageHeader() != null) {
- xhtml.element("p", hfPolicy.getFirstPageHeader().getText());
- }
- if (hfPolicy.getEvenPageHeader() != null) {
- xhtml.element("p", hfPolicy.getEvenPageHeader().getText());
- }
- if (hfPolicy.getDefaultHeader() != null) {
- xhtml.element("p", hfPolicy.getDefaultHeader().getText());
- }
- }
-
- /**
- * Low level structured parsing of document tables.
- */
- private void extractTableContent(XWPFDocument doc, XHTMLContentHandler xhtml)
- throws SAXException {
- for (CTTbl table : doc.getDocument().getBody().getTblArray()) {
- xhtml.startElement("table");
- xhtml.startElement("tbody");
- CTRow[] rows = table.getTrArray();
- for (CTRow row : rows) {
- xhtml.startElement("tr");
- CTTc[] cells = row.getTcArray();
- for (CTTc tc : cells) {
- xhtml.startElement("td");
- CTP[] content = tc.getPArray();
- for (CTP ctp : content) {
- XWPFParagraph p = new MyXWPFParagraph(ctp, doc);
-
- XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
- new XWPFHyperlinkDecorator(p, null, true));
-
- xhtml.element("p", decorator.getText());
- }
-
- xhtml.endElement("td");
- }
- xhtml.endElement("tr");
- }
- xhtml.endElement("tbody");
- xhtml.endElement("table");
- }
- }
-
- /**
- * Private wrapper class that makes the protected {@link XWPFParagraph}
- * constructor available.
- */
- private static class MyXWPFParagraph extends XWPFParagraph {
- private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
- super(ctp, xwpfDocument);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
+import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
+import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
+import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
+import org.xml.sax.SAXException;
+
+public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
+
+ public XWPFWordExtractorDecorator(XWPFWordExtractor extractor) {
+ super(extractor, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+ }
+
+ /**
+ * @see org.apache.poi.xwpf.extractor.XWPFWordExtractor#getText()
+ */
+ @Override
+ protected void buildXHTML(XHTMLContentHandler xhtml)
+ throws SAXException, XmlException, IOException {
+ XWPFDocument document = (XWPFDocument) extractor.getDocument();
+ XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
+
+ // headers
+ extractHeaders(xhtml, hfPolicy);
+
+ // first all paragraphs
+ Iterator<XWPFParagraph> i = document.getParagraphsIterator();
+ while (i.hasNext()) {
+ XWPFParagraph paragraph = i.next();
+
+ CTSectPr ctSectPr = null;
+ if (paragraph.getCTP().getPPr() != null) {
+ ctSectPr = paragraph.getCTP().getPPr().getSectPr();
+ }
+
+ XWPFHeaderFooterPolicy headerFooterPolicy = null;
+
+ if (ctSectPr != null) {
+ headerFooterPolicy =
+ new XWPFHeaderFooterPolicy(document, ctSectPr);
+ extractHeaders(xhtml, headerFooterPolicy);
+ }
+
+ XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
+ new XWPFHyperlinkDecorator(paragraph, null, true));
+
+ CTBookmark[] bookmarks = paragraph.getCTP().getBookmarkStartArray();
+ for (CTBookmark bookmark : bookmarks) {
+ xhtml.element("p", bookmark.getName());
+ }
+
+ xhtml.element("p", decorator.getText());
+
+ if (ctSectPr != null) {
+ extractFooters(xhtml, headerFooterPolicy);
+ }
+ }
+
+ // then all document tables
+ extractTableContent(document, xhtml);
+ extractFooters(xhtml, hfPolicy);
+ }
+
+ private void extractFooters(
+ XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
+ throws SAXException {
+ // footers
+ if (hfPolicy.getFirstPageFooter() != null) {
+ xhtml.element("p", hfPolicy.getFirstPageFooter().getText());
+ }
+ if (hfPolicy.getEvenPageFooter() != null) {
+ xhtml.element("p", hfPolicy.getEvenPageFooter().getText());
+ }
+ if (hfPolicy.getDefaultFooter() != null) {
+ xhtml.element("p", hfPolicy.getDefaultFooter().getText());
+ }
+ }
+
+ private void extractHeaders(
+ XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
+ throws SAXException {
+ if (hfPolicy.getFirstPageHeader() != null) {
+ xhtml.element("p", hfPolicy.getFirstPageHeader().getText());
+ }
+ if (hfPolicy.getEvenPageHeader() != null) {
+ xhtml.element("p", hfPolicy.getEvenPageHeader().getText());
+ }
+ if (hfPolicy.getDefaultHeader() != null) {
+ xhtml.element("p", hfPolicy.getDefaultHeader().getText());
+ }
+ }
+
+ /**
+ * Low level structured parsing of document tables.
+ */
+ private void extractTableContent(XWPFDocument doc, XHTMLContentHandler xhtml)
+ throws SAXException {
+ for (CTTbl table : doc.getDocument().getBody().getTblArray()) {
+ xhtml.startElement("table");
+ xhtml.startElement("tbody");
+ CTRow[] rows = table.getTrArray();
+ for (CTRow row : rows) {
+ xhtml.startElement("tr");
+ CTTc[] cells = row.getTcArray();
+ for (CTTc tc : cells) {
+ xhtml.startElement("td");
+ CTP[] content = tc.getPArray();
+ for (CTP ctp : content) {
+ XWPFParagraph p = new MyXWPFParagraph(ctp, doc);
+
+ XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
+ new XWPFHyperlinkDecorator(p, null, true));
+
+ xhtml.element("p", decorator.getText());
+ }
+
+ xhtml.endElement("td");
+ }
+ xhtml.endElement("tr");
+ }
+ xhtml.endElement("tbody");
+ xhtml.endElement("table");
+ }
+ }
+
+ /**
+ * Private wrapper class that makes the protected {@link XWPFParagraph}
+ * constructor available.
+ */
+ private static class MyXWPFParagraph extends XWPFParagraph {
+ private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
+ super(ctp, xwpfDocument);
+ }
+ }
+}
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java Thu Sep 2 14:40:55 2010
@@ -1,96 +1,96 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.jpeg;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.metadata.Metadata;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.InputStream;
-
-public class JpegParserTest extends TestCase {
- private final Parser parser = new JpegParser();
-
- public void testJPEG() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // All EXIF/TIFF tags
- assertEquals("Canon EOS 40D", metadata.get("Model"));
-
- // Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
- assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-
- // Common tags
- assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
- assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
- }
-
- public void testJPEGGeo() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // Geo tags
- assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
- assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
-
- // All EXIF/TIFF tags
- assertEquals("Canon EOS 40D", metadata.get("Model"));
-
- // Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
- assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-
- // Common tags
- assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
- assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
- }
-
- public void testJPEGTitleAndDescription() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // embedded comments with non-ascii characters
- //assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE));
- assertEquals("Tosteberga " + new String(new byte[]{-61, -124}) + "ngar", metadata.get(Metadata.TITLE));
- //assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
- assertEquals("Bird site in north eastern Sk" + new String(new byte[]{-61, -91}) +
- "ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
- assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
- // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
- //assertEquals("bird watching nature reserve coast grazelands", metadata.get(Metadata.KEYWORDS));
- // ordering is odd when returned from parser as one string
- assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.jpeg;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.InputStream;
+
+public class JpegParserTest extends TestCase {
+ private final Parser parser = new JpegParser();
+
+ public void testJPEG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // All EXIF/TIFF tags
+ assertEquals("Canon EOS 40D", metadata.get("Model"));
+
+ // Core EXIF/TIFF tags
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ // Common tags
+ assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
+ assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
+ }
+
+ public void testJPEGGeo() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // Geo tags
+ assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
+ assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
+
+ // All EXIF/TIFF tags
+ assertEquals("Canon EOS 40D", metadata.get("Model"));
+
+ // Core EXIF/TIFF tags
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ // Common tags
+ assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
+ assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
+ }
+
+ public void testJPEGTitleAndDescription() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // embedded comments with non-ascii characters
+ //assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE));
+ assertEquals("Tosteberga " + new String(new byte[]{-61, -124}) + "ngar", metadata.get(Metadata.TITLE));
+ //assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
+ assertEquals("Bird site in north eastern Sk" + new String(new byte[]{-61, -91}) +
+ "ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
+ assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
+ // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
+ //assertEquals("bird watching nature reserve coast grazelands", metadata.get(Metadata.KEYWORDS));
+ // ordering is odd when returned from parser as one string
+ assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
+ }
+}
Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
------------------------------------------------------------------------------
svn:eol-style = native