You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/02 16:40:56 UTC

svn commit: r991955 [6/6] - in /tika/trunk: tika-core/src/test/java/org/apache/tika/ tika-core/src/test/java/org/apache/tika/detect/ tika-core/src/test/java/org/apache/tika/language/ tika-core/src/test/java/org/apache/tika/sax/ tika-core/src/test/resou...

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java Thu Sep  2 14:40:55 2010
@@ -1,166 +1,166 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft.ooxml;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
-import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
-import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
-import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.apache.poi.xwpf.usermodel.XWPFParagraph;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
-import org.xml.sax.SAXException;
-
-public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
-
-    public XWPFWordExtractorDecorator(XWPFWordExtractor extractor) {
-        super(extractor, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-    }
-
-    /**
-     * @see org.apache.poi.xwpf.extractor.XWPFWordExtractor#getText()
-     */
-    @Override
-    protected void buildXHTML(XHTMLContentHandler xhtml)
-            throws SAXException, XmlException, IOException {
-        XWPFDocument document = (XWPFDocument) extractor.getDocument();
-        XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
-
-        // headers
-        extractHeaders(xhtml, hfPolicy);
-
-        // first all paragraphs
-        Iterator<XWPFParagraph> i = document.getParagraphsIterator();
-        while (i.hasNext()) {
-            XWPFParagraph paragraph = i.next();
-
-            CTSectPr ctSectPr = null;
-            if (paragraph.getCTP().getPPr() != null) {
-                ctSectPr = paragraph.getCTP().getPPr().getSectPr();
-            }
-
-            XWPFHeaderFooterPolicy headerFooterPolicy = null;
-
-            if (ctSectPr != null) {
-                headerFooterPolicy =
-                    new XWPFHeaderFooterPolicy(document, ctSectPr);
-                extractHeaders(xhtml, headerFooterPolicy);
-            }
-
-            XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
-                    new XWPFHyperlinkDecorator(paragraph, null, true));
-
-            CTBookmark[] bookmarks = paragraph.getCTP().getBookmarkStartArray();
-            for (CTBookmark bookmark : bookmarks) {
-                xhtml.element("p", bookmark.getName());
-            }
-
-            xhtml.element("p", decorator.getText());
-
-            if (ctSectPr != null) {
-                extractFooters(xhtml, headerFooterPolicy);
-            }
-        }
-
-        // then all document tables
-        extractTableContent(document, xhtml);
-        extractFooters(xhtml, hfPolicy);
-    }
-
-    private void extractFooters(
-            XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
-            throws SAXException {
-        // footers
-        if (hfPolicy.getFirstPageFooter() != null) {
-            xhtml.element("p", hfPolicy.getFirstPageFooter().getText());
-        }
-        if (hfPolicy.getEvenPageFooter() != null) {
-            xhtml.element("p", hfPolicy.getEvenPageFooter().getText());
-        }
-        if (hfPolicy.getDefaultFooter() != null) {
-            xhtml.element("p", hfPolicy.getDefaultFooter().getText());
-        }
-    }
-
-    private void extractHeaders(
-            XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
-            throws SAXException {
-        if (hfPolicy.getFirstPageHeader() != null) {
-            xhtml.element("p", hfPolicy.getFirstPageHeader().getText());
-        }
-        if (hfPolicy.getEvenPageHeader() != null) {
-            xhtml.element("p", hfPolicy.getEvenPageHeader().getText());
-        }
-        if (hfPolicy.getDefaultHeader() != null) {
-            xhtml.element("p", hfPolicy.getDefaultHeader().getText());
-        }
-    }
-
-    /**
-     * Low level structured parsing of document tables.
-     */
-    private void extractTableContent(XWPFDocument doc, XHTMLContentHandler xhtml)
-            throws SAXException {
-        for (CTTbl table : doc.getDocument().getBody().getTblArray()) {
-            xhtml.startElement("table");
-            xhtml.startElement("tbody");
-            CTRow[] rows = table.getTrArray();
-            for (CTRow row : rows) {
-                xhtml.startElement("tr");
-                CTTc[] cells = row.getTcArray();
-                for (CTTc tc : cells) {
-                    xhtml.startElement("td");
-                    CTP[] content = tc.getPArray();
-                    for (CTP ctp : content) {
-                        XWPFParagraph p = new MyXWPFParagraph(ctp, doc);
-
-                        XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
-                                new XWPFHyperlinkDecorator(p, null, true));
-
-                        xhtml.element("p", decorator.getText());
-                    }
-
-                    xhtml.endElement("td");
-                }
-                xhtml.endElement("tr");
-            }
-            xhtml.endElement("tbody");
-            xhtml.endElement("table");
-        }
-    }
-
-    /**
-     * Private wrapper class that makes the protected {@link XWPFParagraph}
-     * constructor available.
-     */
-    private static class MyXWPFParagraph extends XWPFParagraph {
-        private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
-            super(ctp, xwpfDocument);
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.microsoft.ooxml;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
+import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
+import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
+import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
+import org.xml.sax.SAXException;
+
+public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
+
+    public XWPFWordExtractorDecorator(XWPFWordExtractor extractor) {
+        super(extractor, "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
+    }
+
+    /**
+     * @see org.apache.poi.xwpf.extractor.XWPFWordExtractor#getText()
+     */
+    @Override
+    protected void buildXHTML(XHTMLContentHandler xhtml)
+            throws SAXException, XmlException, IOException {
+        XWPFDocument document = (XWPFDocument) extractor.getDocument();
+        XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
+
+        // headers
+        extractHeaders(xhtml, hfPolicy);
+
+        // first all paragraphs
+        Iterator<XWPFParagraph> i = document.getParagraphsIterator();
+        while (i.hasNext()) {
+            XWPFParagraph paragraph = i.next();
+
+            CTSectPr ctSectPr = null;
+            if (paragraph.getCTP().getPPr() != null) {
+                ctSectPr = paragraph.getCTP().getPPr().getSectPr();
+            }
+
+            XWPFHeaderFooterPolicy headerFooterPolicy = null;
+
+            if (ctSectPr != null) {
+                headerFooterPolicy =
+                    new XWPFHeaderFooterPolicy(document, ctSectPr);
+                extractHeaders(xhtml, headerFooterPolicy);
+            }
+
+            XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
+                    new XWPFHyperlinkDecorator(paragraph, null, true));
+
+            CTBookmark[] bookmarks = paragraph.getCTP().getBookmarkStartArray();
+            for (CTBookmark bookmark : bookmarks) {
+                xhtml.element("p", bookmark.getName());
+            }
+
+            xhtml.element("p", decorator.getText());
+
+            if (ctSectPr != null) {
+                extractFooters(xhtml, headerFooterPolicy);
+            }
+        }
+
+        // then all document tables
+        extractTableContent(document, xhtml);
+        extractFooters(xhtml, hfPolicy);
+    }
+
+    private void extractFooters(
+            XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
+            throws SAXException {
+        // footers
+        if (hfPolicy.getFirstPageFooter() != null) {
+            xhtml.element("p", hfPolicy.getFirstPageFooter().getText());
+        }
+        if (hfPolicy.getEvenPageFooter() != null) {
+            xhtml.element("p", hfPolicy.getEvenPageFooter().getText());
+        }
+        if (hfPolicy.getDefaultFooter() != null) {
+            xhtml.element("p", hfPolicy.getDefaultFooter().getText());
+        }
+    }
+
+    private void extractHeaders(
+            XHTMLContentHandler xhtml, XWPFHeaderFooterPolicy hfPolicy)
+            throws SAXException {
+        if (hfPolicy.getFirstPageHeader() != null) {
+            xhtml.element("p", hfPolicy.getFirstPageHeader().getText());
+        }
+        if (hfPolicy.getEvenPageHeader() != null) {
+            xhtml.element("p", hfPolicy.getEvenPageHeader().getText());
+        }
+        if (hfPolicy.getDefaultHeader() != null) {
+            xhtml.element("p", hfPolicy.getDefaultHeader().getText());
+        }
+    }
+
+    /**
+     * Low level structured parsing of document tables.
+     */
+    private void extractTableContent(XWPFDocument doc, XHTMLContentHandler xhtml)
+            throws SAXException {
+        for (CTTbl table : doc.getDocument().getBody().getTblArray()) {
+            xhtml.startElement("table");
+            xhtml.startElement("tbody");
+            CTRow[] rows = table.getTrArray();
+            for (CTRow row : rows) {
+                xhtml.startElement("tr");
+                CTTc[] cells = row.getTcArray();
+                for (CTTc tc : cells) {
+                    xhtml.startElement("td");
+                    CTP[] content = tc.getPArray();
+                    for (CTP ctp : content) {
+                        XWPFParagraph p = new MyXWPFParagraph(ctp, doc);
+
+                        XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
+                                new XWPFHyperlinkDecorator(p, null, true));
+
+                        xhtml.element("p", decorator.getText());
+                    }
+
+                    xhtml.endElement("td");
+                }
+                xhtml.endElement("tr");
+            }
+            xhtml.endElement("tbody");
+            xhtml.endElement("table");
+        }
+    }
+
+    /**
+     * Private wrapper class that makes the protected {@link XWPFParagraph}
+     * constructor available.
+     */
+    private static class MyXWPFParagraph extends XWPFParagraph {
+        private MyXWPFParagraph(CTP ctp, XWPFDocument xwpfDocument) {
+            super(ctp, xwpfDocument);
+        }
+    }
+}

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java Thu Sep  2 14:40:55 2010
@@ -1,96 +1,96 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.jpeg;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.metadata.Metadata;
-import org.xml.sax.helpers.DefaultHandler;
-
-import java.io.InputStream;
-
-public class JpegParserTest extends TestCase {
-    private final Parser parser = new JpegParser();
-
-    public void testJPEG() throws Exception {
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
-        InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
-        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
-        // All EXIF/TIFF tags
-        assertEquals("Canon EOS 40D", metadata.get("Model"));
-        
-        // Core EXIF/TIFF tags
-        assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
-        assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
-        assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
-        assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-        
-        // Common tags
-        assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
-        assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
-    }
-
-    public void testJPEGGeo() throws Exception {
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
-        InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
-        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-        
-        // Geo tags
-        assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
-        assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
-
-        // All EXIF/TIFF tags
-        assertEquals("Canon EOS 40D", metadata.get("Model"));
-        
-        // Core EXIF/TIFF tags
-        assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
-        assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
-        assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
-        assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-        
-        // Common tags
-        assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
-        assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
-    }
-    
-    public void testJPEGTitleAndDescription() throws Exception {
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
-        InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
-        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-          
-        // embedded comments with non-ascii characters
-        //assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE));
-        assertEquals("Tosteberga " + new String(new byte[]{-61, -124}) + "ngar", metadata.get(Metadata.TITLE));
-        //assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
-        assertEquals("Bird site in north eastern Sk" + new String(new byte[]{-61, -91}) + 
-        		"ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
-        assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
-        // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
-        //assertEquals("bird watching nature reserve coast grazelands", metadata.get(Metadata.KEYWORDS));
-        // ordering is odd when returned from parser as one string
-        assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.jpeg;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.helpers.DefaultHandler;
+
+import java.io.InputStream;
+
+public class JpegParserTest extends TestCase {
+    private final Parser parser = new JpegParser();
+
+    public void testJPEG() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+        // All EXIF/TIFF tags
+        assertEquals("Canon EOS 40D", metadata.get("Model"));
+        
+        // Core EXIF/TIFF tags
+        assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+        assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+        assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+        
+        // Common tags
+        assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
+        assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
+    }
+
+    public void testJPEGGeo() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+        
+        // Geo tags
+        assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
+        assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
+
+        // All EXIF/TIFF tags
+        assertEquals("Canon EOS 40D", metadata.get("Model"));
+        
+        // Core EXIF/TIFF tags
+        assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+        assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+        assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+        
+        // Common tags
+        assertEquals("2009/10/02 23:02:49", metadata.get(Metadata.DATE));
+        assertEquals("canon-55-250 moscow-birds serbor", metadata.get(Metadata.KEYWORDS));
+    }
+    
+    public void testJPEGTitleAndDescription() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+          
+        // embedded comments with non-ascii characters
+        //assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE));
+        assertEquals("Tosteberga " + new String(new byte[]{-61, -124}) + "ngar", metadata.get(Metadata.TITLE));
+        //assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
+        assertEquals("Bird site in north eastern Sk" + new String(new byte[]{-61, -91}) + 
+        		"ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION));
+        assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR));
+        // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
+        //assertEquals("bird watching nature reserve coast grazelands", metadata.get(Metadata.KEYWORDS));
+        // ordering is odd when returned from parser as one string
+        assertEquals("grazelands nature reserve bird watching coast", metadata.get(Metadata.KEYWORDS));
+    }
+}

Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
------------------------------------------------------------------------------
    svn:eol-style = native