You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/24 15:44:45 UTC
[3/4] tika git commit: TIKA-2020,
remove 3 parameter parse() and simplify CAD tests
TIKA-2020, remove 3 parameter parse() and simplify CAD tests
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0c71b2ff
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0c71b2ff
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0c71b2ff
Branch: refs/heads/2.x
Commit: 0c71b2ffc97a3907a541fdd164ba79302f5c0637
Parents: 6bb6827
Author: tballison <ta...@mitre.org>
Authored: Fri Jun 24 11:13:54 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Fri Jun 24 11:13:54 2016 -0400
----------------------------------------------------------------------
.../apache/tika/parser/dwg/DWGParserTest.java | 372 +++++++++----------
.../apache/tika/parser/prt/PRTParserTest.java | 214 +++++------
2 files changed, 271 insertions(+), 315 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/0c71b2ff/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
index 321d715..ee3e767 100644
--- a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
+++ b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
@@ -1,202 +1,170 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.dwg;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.apache.tika.TikaTest.assertContains;
-
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class DWGParserTest {
-
- @Test
- public void testDWG2000Parser() throws Exception {
- InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2000.dwg");
- testParserAlt(input);
- }
-
- @Test
- public void testDWG2004Parser() throws Exception {
- InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2004.dwg");
- testParser(input);
- }
-
- @Test
- public void testDWG2004ParserNoHeaderAddress() throws Exception {
- InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2004_no_header.dwg");
- testParserNoHeader(input);
- }
-
- @Test
- public void testDWG2007Parser() throws Exception {
- InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2007.dwg");
- testParser(input);
- }
-
- @Test
- public void testDWG2010Parser() throws Exception {
- InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2010.dwg");
- testParser(input);
- }
-
- @Test
- public void testDWG2010CustomPropertiesParser() throws Exception {
- // Check that standard parsing works
- InputStream testInput = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2010_custom_props.dwg");
- testParser(testInput);
-
- // Check that custom properties with alternate padding work
- try (InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWG2010_custom_props.dwg")) {
- Metadata metadata = new Metadata();
- ContentHandler handler = new BodyContentHandler();
- new DWGParser().parse(input, handler, metadata, null);
-
- assertEquals("valueforcustomprop1",
- metadata.get("customprop1"));
- assertEquals("valueforcustomprop2",
- metadata.get("customprop2"));
- }
- }
-
- @Test
- public void testDWGMechParser() throws Exception {
- String[] types = new String[] {
- "6", "2004", "2004DX", "2005", "2006",
- "2007", "2008", "2009", "2010", "2011"
- };
- for (String type : types) {
- InputStream input = DWGParserTest.class.getResourceAsStream(
- "/test-documents/testDWGmech"+type+".dwg");
- testParserAlt(input);
- }
- }
-
- @SuppressWarnings("deprecation")
- private void testParser(InputStream input) throws Exception {
- try {
- Metadata metadata = new Metadata();
- ContentHandler handler = new BodyContentHandler();
- new DWGParser().parse(input, handler, metadata);
-
- assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
-
- assertEquals("The quick brown fox jumps over the lazy dog",
- metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(Metadata.SUBJECT));
- assertEquals("Nevin Nollop",
- metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Pangram, fox, dog",
- metadata.get(TikaCoreProperties.KEYWORDS));
- assertEquals("Lorem ipsum",
- metadata.get(TikaCoreProperties.COMMENTS).substring(0,11));
- assertEquals("http://www.alfresco.com",
- metadata.get(TikaCoreProperties.RELATION));
-
- // Check some of the old style metadata too
- assertEquals("The quick brown fox jumps over the lazy dog",
- metadata.get(Metadata.TITLE));
- assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(Metadata.SUBJECT));
-
- String content = handler.toString();
- assertContains("The quick brown fox jumps over the lazy dog", content);
- assertContains("Gym class", content);
- assertContains("www.alfresco.com", content);
- } finally {
- input.close();
- }
- }
-
- @SuppressWarnings("deprecation")
- private void testParserNoHeader(InputStream input) throws Exception {
- try {
- Metadata metadata = new Metadata();
- ContentHandler handler = new BodyContentHandler();
- new DWGParser().parse(input, handler, metadata);
-
- assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
-
- assertNull(metadata.get(TikaCoreProperties.TITLE));
- assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
- assertNull(metadata.get(Metadata.SUBJECT));
- assertNull(metadata.get(TikaCoreProperties.CREATOR));
- assertNull(metadata.get(TikaCoreProperties.KEYWORDS));
- assertNull(metadata.get(TikaCoreProperties.COMMENTS));
- assertNull(metadata.get(TikaCoreProperties.RELATION));
-
- String content = handler.toString();
- assertEquals("", content);
- } finally {
- input.close();
- }
- }
-
- @SuppressWarnings("deprecation")
- private void testParserAlt(InputStream input) throws Exception {
- try {
- Metadata metadata = new Metadata();
- ContentHandler handler = new BodyContentHandler();
- new DWGParser().parse(input, handler, metadata);
-
- assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
-
- assertEquals("Test Title",
- metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Test Subject",
- metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Test Subject",
- metadata.get(Metadata.SUBJECT));
- assertEquals("My Author",
- metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("My keyword1, MyKeyword2",
- metadata.get(TikaCoreProperties.KEYWORDS));
- assertEquals("This is a comment",
- metadata.get(TikaCoreProperties.COMMENTS));
- assertEquals("bejanpol",
- metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("bejanpol",
- metadata.get(Metadata.LAST_AUTHOR));
- assertEquals("http://mycompany/drawings",
- metadata.get(TikaCoreProperties.RELATION));
- assertEquals("MyCustomPropertyValue",
- metadata.get("MyCustomProperty"));
-
- String content = handler.toString();
- assertContains("This is a comment", content);
- assertContains("mycompany", content);
- } finally {
- input.close();
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.dwg;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.io.InputStream;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.BodyContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class DWGParserTest extends TikaTest {
+
+ @Test
+ public void testDWG2000Parser() throws Exception {
+ testParserAlt("testDWG2000.dwg");
+ }
+
+ @Test
+ public void testDWG2004Parser() throws Exception {
+ testParser("testDWG2004.dwg");
+ }
+
+ @Test
+ public void testDWG2004ParserNoHeaderAddress() throws Exception {
+ testParserNoHeader("testDWG2004_no_header.dwg");
+ }
+
+ @Test
+ public void testDWG2007Parser() throws Exception {
+ testParser("testDWG2007.dwg");
+ }
+
+ @Test
+ public void testDWG2010Parser() throws Exception {
+ testParser("testDWG2010.dwg");
+ }
+
+ @Test
+ public void testDWG2010CustomPropertiesParser() throws Exception {
+ // Check that standard parsing works
+ testParser("testDWG2010_custom_props.dwg");
+
+ // Check that custom properties with alternate padding work
+
+ XMLResult r = getXML("testDWG2010_custom_props.dwg");
+ assertEquals("valueforcustomprop1",
+ r.metadata.get("customprop1"));
+ assertEquals("valueforcustomprop2",
+ r.metadata.get("customprop2"));
+ }
+
+ @Test
+ public void testDWGMechParser() throws Exception {
+ String[] types = new String[]{
+ "6", "2004", "2004DX", "2005", "2006",
+ "2007", "2008", "2009", "2010", "2011"
+ };
+ for (String type : types) {
+ testParserAlt("testDWGmech" + type + ".dwg");
+ }
+ }
+
+ private void testParser(String testFileName) throws Exception {
+ XMLResult r = getXML(testFileName, new DWGParser());
+ Metadata metadata = r.metadata;
+ String content = r.xml;
+
+
+ assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
+
+ assertEquals("The quick brown fox jumps over the lazy dog",
+ metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Gym class featuring a brown fox and lazy dog",
+ metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("Gym class featuring a brown fox and lazy dog",
+ metadata.get(Metadata.SUBJECT));
+ assertEquals("Nevin Nollop",
+ metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Pangram, fox, dog",
+ metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("Lorem ipsum",
+ metadata.get(TikaCoreProperties.COMMENTS).substring(0, 11));
+ assertEquals("http://www.alfresco.com",
+ metadata.get(TikaCoreProperties.RELATION));
+
+ // Check some of the old style metadata too
+ assertEquals("The quick brown fox jumps over the lazy dog",
+ metadata.get(Metadata.TITLE));
+ assertEquals("Gym class featuring a brown fox and lazy dog",
+ metadata.get(Metadata.SUBJECT));
+
+ assertContains("The quick brown fox jumps over the lazy dog", content);
+ assertContains("Gym class", content);
+ assertContains("www.alfresco.com", content);
+ }
+
+ @SuppressWarnings("deprecation")
+ private void testParserNoHeader(String testFileName) throws Exception {
+ try (InputStream input = getResourceAsStream("/test-documents/" + testFileName)) {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ new DWGParser().parse(input, handler, metadata, new ParseContext());
+
+ assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
+
+ assertNull(metadata.get(TikaCoreProperties.TITLE));
+ assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertNull(metadata.get(Metadata.SUBJECT));
+ assertNull(metadata.get(TikaCoreProperties.CREATOR));
+ assertNull(metadata.get(TikaCoreProperties.KEYWORDS));
+ assertNull(metadata.get(TikaCoreProperties.COMMENTS));
+ assertNull(metadata.get(TikaCoreProperties.RELATION));
+
+ String content = handler.toString();
+ assertEquals("", content);
+ }
+ }
+
+ private void testParserAlt(String testFileName) throws Exception {
+ XMLResult r = getXML(testFileName, new DWGParser());
+ Metadata metadata = r.metadata;
+ String content = r.xml;
+
+ assertEquals("image/vnd.dwg", metadata.get(Metadata.CONTENT_TYPE));
+
+ assertEquals("Test Title",
+ metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Test Subject",
+ metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("Test Subject",
+ metadata.get(Metadata.SUBJECT));
+ assertEquals("My Author",
+ metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("My keyword1, MyKeyword2",
+ metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("This is a comment",
+ metadata.get(TikaCoreProperties.COMMENTS));
+ assertEquals("bejanpol",
+ metadata.get(TikaCoreProperties.MODIFIER));
+ assertEquals("bejanpol",
+ metadata.get(Metadata.LAST_AUTHOR));
+ assertEquals("http://mycompany/drawings",
+ metadata.get(TikaCoreProperties.RELATION));
+ assertEquals("MyCustomPropertyValue",
+ metadata.get("MyCustomProperty"));
+ assertContains("This is a comment", content);
+ assertContains("mycompany", content);
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/0c71b2ff/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
index 155512c..de870ed 100644
--- a/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
+++ b/tika-parser-modules/tika-parser-cad-module/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
@@ -1,113 +1,101 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.prt;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.sax.BodyContentHandler;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-
-public class PRTParserTest extends TikaTest {
- /**
- * Try with a simple file
- */
- @Test
- public void testPRTParserBasics() throws Exception {
- try (InputStream input = getResourceAsStream("/test-documents/testCADKEY.prt")) {
- Metadata metadata = new Metadata();
- ContentHandler handler = new BodyContentHandler();
- new PRTParser().parse(input, handler, metadata);
-
- assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
-
- // This file has a date
- assertEquals("2011-06-20T16:54:00",
- metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-06-20T16:54:00",
- metadata.get(Metadata.CREATION_DATE));
- // But no description
- assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
-
- String contents = handler.toString();
-
- assertContains("Front View", contents);
- assertContains("Back View", contents);
- assertContains("Bottom View", contents);
- assertContains("Right View", contents);
- assertContains("Left View", contents);
- //assertContains("Isometric View", contents); // Can't detect yet
- assertContains("Axonometric View", contents);
-
- assertContains("You've managed to extract all the text!", contents);
- assertContains("This is more text", contents);
- assertContains("Text Inside a PRT file", contents);
- }
- }
-
- /**
- * Now a more complex one
- */
- @Test
- public void testPRTParserComplex() throws Exception {
- try (InputStream input = getResourceAsStream("/test-documents/testCADKEY2.prt")) {
- Metadata metadata = new Metadata();
- ContentHandler handler = new BodyContentHandler();
- new PRTParser().parse(input, handler, metadata);
-
- assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
-
- // File has both a date and a description
- assertEquals("1997-04-01T08:59:00",
- metadata.get(Metadata.DATE));
- assertEquals("1997-04-01T08:59:00",
- metadata.get(Metadata.CREATION_DATE));
- assertEquals("TIKA TEST PART DESCRIPTION INFORMATION\r\n",
- metadata.get(TikaCoreProperties.DESCRIPTION));
-
- String contents = handler.toString();
-
- assertContains("ITEM", contents);
- assertContains("REQ.", contents);
- assertContains("DESCRIPTION", contents);
- assertContains("MAT'L", contents);
- assertContains("TOLERANCES UNLESS", contents);
- assertContains("FRACTIONS", contents);
- assertContains("ANGLES", contents);
- assertContains("Acme Corporation", contents);
-
- assertContains("DATE", contents);
- assertContains("CHANGE", contents);
- assertContains("DRAWN BY", contents);
- assertContains("SCALE", contents);
- assertContains("TIKA TEST DRAWING", contents);
- assertContains("TIKA LETTERS", contents);
- assertContains("5.82", contents);
- assertContains("112" + '\u00b0', contents); // Degrees
- assertContains("TIKA TEST LETTER", contents);
- assertContains("17.11", contents);
- assertContains('\u00d8' + "\ufffd2.000", contents); // Diameter
- assertContains("Diameter", contents);
- assertContains("The Apache Tika toolkit", contents);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.prt;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.junit.Test;
+
+public class PRTParserTest extends TikaTest {
+ /**
+ * Try with a simple file
+ */
+ @Test
+ public void testPRTParserBasics() throws Exception {
+ XMLResult r = getXML("testCADKey.prt", new PRTParser());
+ Metadata metadata = r.metadata;
+ String contents = r.xml;
+ assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
+
+ // This file has a date
+ assertEquals("2011-06-20T16:54:00",
+ metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2011-06-20T16:54:00",
+ metadata.get(Metadata.CREATION_DATE));
+ // But no description
+ assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
+
+ assertContains("Front View", contents);
+ assertContains("Back View", contents);
+ assertContains("Bottom View", contents);
+ assertContains("Right View", contents);
+ assertContains("Left View", contents);
+ //assertContains("Isometric View", contents); // Can't detect yet
+ assertContains("Axonometric View", contents);
+
+ assertContains("You've managed to extract all the text!", contents);
+ assertContains("This is more text", contents);
+ assertContains("Text Inside a PRT file", contents);
+
+ }
+
+ /**
+ * Now a more complex one
+ */
+ @Test
+ public void testPRTParserComplex() throws Exception {
+
+ XMLResult r = getXML("testCADKEY2.prt", new PRTParser());
+ Metadata metadata = r.metadata;
+ String contents = r.xml;
+ assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE));
+
+ // File has both a date and a description
+ assertEquals("1997-04-01T08:59:00",
+ metadata.get(Metadata.DATE));
+ assertEquals("1997-04-01T08:59:00",
+ metadata.get(Metadata.CREATION_DATE));
+ assertEquals("TIKA TEST PART DESCRIPTION INFORMATION\r\n",
+ metadata.get(TikaCoreProperties.DESCRIPTION));
+
+ assertContains("ITEM", contents);
+ assertContains("REQ.", contents);
+ assertContains("DESCRIPTION", contents);
+ assertContains("MAT'L", contents);
+ assertContains("TOLERANCES UNLESS", contents);
+ assertContains("FRACTIONS", contents);
+ assertContains("ANGLES", contents);
+ assertContains("Acme Corporation", contents);
+
+ assertContains("DATE", contents);
+ assertContains("CHANGE", contents);
+ assertContains("DRAWN BY", contents);
+ assertContains("SCALE", contents);
+ assertContains("TIKA TEST DRAWING", contents);
+ assertContains("TIKA LETTERS", contents);
+ assertContains("5.82", contents);
+ assertContains("112" + '\u00b0', contents); // Degrees
+ assertContains("TIKA TEST LETTER", contents);
+ assertContains("17.11", contents);
+ assertContains('\u00d8' + "\ufffd2.000", contents); // Diameter
+ assertContains("Diameter", contents);
+ assertContains("The Apache Tika toolkit", contents);
+ }
+}