You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/24 15:44:45 UTC

[3/4] tika git commit: TIKA-2020, remove 3 parameter parse() and simplify CAD tests

TIKA-2020, remove 3 parameter parse() and simplify CAD tests


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0c71b2ff
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0c71b2ff
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0c71b2ff

Branch: refs/heads/2.x
Commit: 0c71b2ffc97a3907a541fdd164ba79302f5c0637
Parents: 6bb6827
Author: tballison <ta...@mitre.org>
Authored: Fri Jun 24 11:13:54 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Jun 24 11:13:54 2016 -0400

----------------------------------------------------------------------
 .../apache/tika/parser/dwg/DWGParserTest.java   | 372 +++++++++----------
 .../apache/tika/parser/prt/PRTParserTest.java   | 214 +++++------
 2 files changed, 271 insertions(+), 315 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/0c71b2ff/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
index 321d715..ee3e767 100644
--- a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
+++ b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
@@ -1,202 +1,170 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.dwg;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.apache.tika.TikaTest.assertContains;
-
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class DWGParserTest {
-  
-    @Test
-    public void testDWG2000Parser() throws Exception {
-        InputStream input = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2000.dwg");
-        testParserAlt(input);
-    }
-
-    @Test
-    public void testDWG2004Parser() throws Exception {
-        InputStream input = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2004.dwg");
-        testParser(input);
-    }
-
-    @Test
-    public void testDWG2004ParserNoHeaderAddress() throws Exception {
-        InputStream input = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2004_no_header.dwg");
-        testParserNoHeader(input);
-    }
-
-    @Test
-    public void testDWG2007Parser() throws Exception {
-        InputStream input = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2007.dwg");
-        testParser(input);
-    }
-
-    @Test
-    public void testDWG2010Parser() throws Exception {
-        InputStream input = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2010.dwg");
-        testParser(input);
-    }
-    
-    @Test
-    public void testDWG2010CustomPropertiesParser() throws Exception {
-        // Check that standard parsing works
-        InputStream testInput = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2010_custom_props.dwg");
-        testParser(testInput);
-        
-        // Check that custom properties with alternate padding work
-        try (InputStream input = DWGParserTest.class.getResourceAsStream(
-                "/test-documents/testDWG2010_custom_props.dwg")) {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new DWGParser().parse(input, handler, metadata, null);
-
-            assertEquals("valueforcustomprop1",
-                    metadata.get("customprop1"));
-            assertEquals("valueforcustomprop2",
-                    metadata.get("customprop2"));
-        }
-    }
-
-    @Test
-    public void testDWGMechParser() throws Exception {
-        String[] types = new String[] {
-              "6", "2004", "2004DX", "2005", "2006",
-              "2007", "2008", "2009", "2010", "2011"
-        };
-        for (String type : types) {
-           InputStream input = DWGParserTest.class.getResourceAsStream(
-                   "/test-documents/testDWGmech"+type+".dwg");
-           testParserAlt(input);
-        }
-    }
-
-    @SuppressWarnings("deprecation")
-    private void testParser(InputStream input) throws Exception {
-        try {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new DWGParser().parse(input, handler, metadata);
-
-            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
-
-            assertEquals("The quick brown fox jumps over the lazy dog", 
-                    metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Gym class featuring a brown fox and lazy dog",
-                    metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertEquals("Gym class featuring a brown fox and lazy dog",
-                    metadata.get(Metadata.SUBJECT));
-            assertEquals("Nevin Nollop",
-                    metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("Pangram, fox, dog",
-                    metadata.get(TikaCoreProperties.KEYWORDS));
-            assertEquals("Lorem ipsum",
-                    metadata.get(TikaCoreProperties.COMMENTS).substring(0,11));
-            assertEquals("http://www.alfresco.com",
-                    metadata.get(TikaCoreProperties.RELATION));
-            
-            // Check some of the old style metadata too
-            assertEquals("The quick brown fox jumps over the lazy dog", 
-                  metadata.get(Metadata.TITLE));
-            assertEquals("Gym class featuring a brown fox and lazy dog",
-                  metadata.get(Metadata.SUBJECT));
-
-            String content = handler.toString();
-            assertContains("The quick brown fox jumps over the lazy dog", content);
-            assertContains("Gym class", content);
-            assertContains("www.alfresco.com", content);
-        } finally {
-            input.close();
-        }
-    }
-
-    @SuppressWarnings("deprecation")
-    private void testParserNoHeader(InputStream input) throws Exception {
-        try {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new DWGParser().parse(input, handler, metadata);
-
-            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
-            
-            assertNull(metadata.get(TikaCoreProperties.TITLE));
-            assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertNull(metadata.get(Metadata.SUBJECT));
-            assertNull(metadata.get(TikaCoreProperties.CREATOR));
-            assertNull(metadata.get(TikaCoreProperties.KEYWORDS));
-            assertNull(metadata.get(TikaCoreProperties.COMMENTS));
-            assertNull(metadata.get(TikaCoreProperties.RELATION));
-
-            String content = handler.toString();
-            assertEquals("", content);
-        } finally {
-            input.close();
-        }
-    }
-
-    @SuppressWarnings("deprecation")
-    private void testParserAlt(InputStream input) throws Exception {
-        try {
-            Metadata metadata = new Metadata();
-            ContentHandler handler = new BodyContentHandler();
-            new DWGParser().parse(input, handler, metadata);
-
-            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
-
-            assertEquals("Test Title", 
-                    metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("Test Subject",
-                    metadata.get(TikaCoreProperties.DESCRIPTION));
-            assertEquals("Test Subject",
-                    metadata.get(Metadata.SUBJECT));
-            assertEquals("My Author",
-                    metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("My keyword1, MyKeyword2",
-                    metadata.get(TikaCoreProperties.KEYWORDS));
-            assertEquals("This is a comment",
-                    metadata.get(TikaCoreProperties.COMMENTS));
-            assertEquals("bejanpol",
-                    metadata.get(TikaCoreProperties.MODIFIER));
-            assertEquals("bejanpol",
-                    metadata.get(Metadata.LAST_AUTHOR));
-            assertEquals("http://mycompany/drawings",
-                    metadata.get(TikaCoreProperties.RELATION));
-            assertEquals("MyCustomPropertyValue",
-                  metadata.get("MyCustomProperty"));
-
-            String content = handler.toString();
-            assertContains("This is a comment", content);
-            assertContains("mycompany", content);
-        } finally {
-            input.close();
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.dwg;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class DWGParserTest extends TikaTest {
+
+    @Test
+    public void testDWG2000Parser() throws Exception {
+        testParserAlt("testDWG2000.dwg");
+    }
+
+    @Test
+    public void testDWG2004Parser() throws Exception {
+        testParser("testDWG2004.dwg");
+    }
+
+    @Test
+    public void testDWG2004ParserNoHeaderAddress() throws Exception {
+        testParserNoHeader("testDWG2004_no_header.dwg");
+    }
+
+    @Test
+    public void testDWG2007Parser() throws Exception {
+        testParser("testDWG2007.dwg");
+    }
+
+    @Test
+    public void testDWG2010Parser() throws Exception {
+        testParser("testDWG2010.dwg");
+    }
+
+    @Test
+    public void testDWG2010CustomPropertiesParser() throws Exception {
+        // Check that standard parsing works
+        testParser("testDWG2010_custom_props.dwg");
+
+        // Check that custom properties with alternate padding work
+
+        XMLResult r = getXML("testDWG2010_custom_props.dwg");
+        assertEquals("valueforcustomprop1",
+                r.metadata.get("customprop1"));
+        assertEquals("valueforcustomprop2",
+                r.metadata.get("customprop2"));
+    }
+
+    @Test
+    public void testDWGMechParser() throws Exception {
+        String[] types = new String[]{
+                "6", "2004", "2004DX", "2005", "2006",
+                "2007", "2008", "2009", "2010", "2011"
+        };
+        for (String type : types) {
+            testParserAlt("testDWGmech" + type + ".dwg");
+        }
+    }
+
+    private void testParser(String testFileName) throws Exception {
+        XMLResult r = getXML(testFileName, new DWGParser());
+        Metadata metadata = r.metadata;
+        String content = r.xml;
+
+
+        assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
+
+        assertEquals("The quick brown fox jumps over the lazy dog",
+                metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Gym class featuring a brown fox and lazy dog",
+                metadata.get(TikaCoreProperties.DESCRIPTION));
+        assertEquals("Gym class featuring a brown fox and lazy dog",
+                metadata.get(Metadata.SUBJECT));
+        assertEquals("Nevin Nollop",
+                metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Pangram, fox, dog",
+                metadata.get(TikaCoreProperties.KEYWORDS));
+        assertEquals("Lorem ipsum",
+                metadata.get(TikaCoreProperties.COMMENTS).substring(0, 11));
+        assertEquals("http://www.alfresco.com",
+                metadata.get(TikaCoreProperties.RELATION));
+
+        // Check some of the old style metadata too
+        assertEquals("The quick brown fox jumps over the lazy dog",
+                metadata.get(Metadata.TITLE));
+        assertEquals("Gym class featuring a brown fox and lazy dog",
+                metadata.get(Metadata.SUBJECT));
+
+        assertContains("The quick brown fox jumps over the lazy dog", content);
+        assertContains("Gym class", content);
+        assertContains("www.alfresco.com", content);
+    }
+
+    @SuppressWarnings("deprecation")
+    private void testParserNoHeader(String testFileName) throws Exception {
+        try (InputStream input = getResourceAsStream("/test-documents/" + testFileName)) {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            new DWGParser().parse(input, handler, metadata, new ParseContext());
+
+            assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
+
+            assertNull(metadata.get(TikaCoreProperties.TITLE));
+            assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
+            assertNull(metadata.get(Metadata.SUBJECT));
+            assertNull(metadata.get(TikaCoreProperties.CREATOR));
+            assertNull(metadata.get(TikaCoreProperties.KEYWORDS));
+            assertNull(metadata.get(TikaCoreProperties.COMMENTS));
+            assertNull(metadata.get(TikaCoreProperties.RELATION));
+
+            String content = handler.toString();
+            assertEquals("", content);
+        }
+    }
+
+    private void testParserAlt(String testFileName) throws Exception {
+        XMLResult r = getXML(testFileName, new DWGParser());
+        Metadata metadata = r.metadata;
+        String content = r.xml;
+
+        assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
+
+        assertEquals("Test Title",
+                metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Test Subject",
+                metadata.get(TikaCoreProperties.DESCRIPTION));
+        assertEquals("Test Subject",
+                metadata.get(Metadata.SUBJECT));
+        assertEquals("My Author",
+                metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("My keyword1, MyKeyword2",
+                metadata.get(TikaCoreProperties.KEYWORDS));
+        assertEquals("This is a comment",
+                metadata.get(TikaCoreProperties.COMMENTS));
+        assertEquals("bejanpol",
+                metadata.get(TikaCoreProperties.MODIFIER));
+        assertEquals("bejanpol",
+                metadata.get(Metadata.LAST_AUTHOR));
+        assertEquals("http://mycompany/drawings",
+                metadata.get(TikaCoreProperties.RELATION));
+        assertEquals("MyCustomPropertyValue",
+                metadata.get("MyCustomProperty"));
+        assertContains("This is a comment", content);
+        assertContains("mycompany", content);
+    }
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/0c71b2ff/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
index 155512c..de870ed 100644
--- a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
+++ b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
@@ -1,113 +1,101 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.prt;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class PRTParserTest extends TikaTest {
-    /**
-     * Try with a simple file
-     */
-    @Test
-    public void testPRTParserBasics() throws Exception {
-       try (InputStream input = getResourceAsStream("/test-documents/testCADKEY.prt")) {
-          Metadata metadata = new Metadata();
-          ContentHandler handler = new BodyContentHandler();
-          new PRTParser().parse(input, handler, metadata);
-
-          assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
-
-          // This file has a date
-          assertEquals("2011-06-20T16:54:00",
-                  metadata.get(TikaCoreProperties.CREATED));
-          assertEquals("2011-06-20T16:54:00",
-                  metadata.get(Metadata.CREATION_DATE));
-          // But no description
-          assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
-
-          String contents = handler.toString();
-
-          assertContains("Front View", contents);
-          assertContains("Back View", contents);
-          assertContains("Bottom View", contents);
-          assertContains("Right View", contents);
-          assertContains("Left View", contents);
-          //assertContains("Isometric View", contents); // Can't detect yet
-          assertContains("Axonometric View", contents);
-
-          assertContains("You've managed to extract all the text!", contents);
-          assertContains("This is more text", contents);
-          assertContains("Text Inside a PRT file", contents);
-       }
-    }
-
-    /**
-     * Now a more complex one
-     */
-    @Test
-    public void testPRTParserComplex() throws Exception {
-       try (InputStream input = getResourceAsStream("/test-documents/testCADKEY2.prt")) {
-          Metadata metadata = new Metadata();
-          ContentHandler handler = new BodyContentHandler();
-          new PRTParser().parse(input, handler, metadata);
-
-          assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
-
-          // File has both a date and a description
-          assertEquals("1997-04-01T08:59:00",
-                  metadata.get(Metadata.DATE));
-          assertEquals("1997-04-01T08:59:00",
-                  metadata.get(Metadata.CREATION_DATE));
-          assertEquals("TIKA TEST PART DESCRIPTION INFORMATION\r\n",
-                  metadata.get(TikaCoreProperties.DESCRIPTION));
-
-          String contents = handler.toString();
-
-          assertContains("ITEM", contents);
-          assertContains("REQ.", contents);
-          assertContains("DESCRIPTION", contents);
-          assertContains("MAT'L", contents);
-          assertContains("TOLERANCES UNLESS", contents);
-          assertContains("FRACTIONS", contents);
-          assertContains("ANGLES", contents);
-          assertContains("Acme Corporation", contents);
-
-          assertContains("DATE", contents);
-          assertContains("CHANGE", contents);
-          assertContains("DRAWN BY", contents);
-          assertContains("SCALE", contents);
-          assertContains("TIKA TEST DRAWING", contents);
-          assertContains("TIKA LETTERS", contents);
-          assertContains("5.82", contents);
-          assertContains("112" + '\u00b0', contents); // Degrees
-          assertContains("TIKA TEST LETTER", contents);
-          assertContains("17.11", contents);
-          assertContains('\u00d8' + "\ufffd2.000", contents); // Diameter
-          assertContains("Diameter", contents);
-          assertContains("The Apache Tika toolkit", contents);
-       }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.prt;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.junit.Test;
+
+public class PRTParserTest extends TikaTest {
+    /**
+     * Try with a simple file
+     */
+    @Test
+    public void testPRTParserBasics() throws Exception {
+        XMLResult r = getXML("testCADKey.prt", new PRTParser());
+        Metadata metadata = r.metadata;
+        String contents = r.xml;
+        assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
+
+        // This file has a date
+        assertEquals("2011-06-20T16:54:00",
+                metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("2011-06-20T16:54:00",
+                metadata.get(Metadata.CREATION_DATE));
+        // But no description
+        assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
+
+        assertContains("Front View", contents);
+        assertContains("Back View", contents);
+        assertContains("Bottom View", contents);
+        assertContains("Right View", contents);
+        assertContains("Left View", contents);
+        //assertContains("Isometric View", contents); // Can't detect yet
+        assertContains("Axonometric View", contents);
+
+        assertContains("You've managed to extract all the text!", contents);
+        assertContains("This is more text", contents);
+        assertContains("Text Inside a PRT file", contents);
+
+    }
+
+    /**
+     * Now a more complex one
+     */
+    @Test
+    public void testPRTParserComplex() throws Exception {
+
+        XMLResult r = getXML("testCADKEY2.prt", new PRTParser());
+        Metadata metadata = r.metadata;
+        String contents = r.xml;
+        assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
+
+        // File has both a date and a description
+        assertEquals("1997-04-01T08:59:00",
+                metadata.get(Metadata.DATE));
+        assertEquals("1997-04-01T08:59:00",
+                metadata.get(Metadata.CREATION_DATE));
+        assertEquals("TIKA TEST PART DESCRIPTION INFORMATION\r\n",
+                metadata.get(TikaCoreProperties.DESCRIPTION));
+
+        assertContains("ITEM", contents);
+        assertContains("REQ.", contents);
+        assertContains("DESCRIPTION", contents);
+        assertContains("MAT'L", contents);
+        assertContains("TOLERANCES UNLESS", contents);
+        assertContains("FRACTIONS", contents);
+        assertContains("ANGLES", contents);
+        assertContains("Acme Corporation", contents);
+
+        assertContains("DATE", contents);
+        assertContains("CHANGE", contents);
+        assertContains("DRAWN BY", contents);
+        assertContains("SCALE", contents);
+        assertContains("TIKA TEST DRAWING", contents);
+        assertContains("TIKA LETTERS", contents);
+        assertContains("5.82", contents);
+        assertContains("112" + '\u00b0', contents); // Degrees
+        assertContains("TIKA TEST LETTER", contents);
+        assertContains("17.11", contents);
+        assertContains('\u00d8' + "\ufffd2.000", contents); // Diameter
+        assertContains("Diameter", contents);
+        assertContains("The Apache Tika toolkit", contents);
+    }
+}