You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/02 16:40:56 UTC

svn commit: r991955 [3/6] - in /tika/trunk: tika-core/src/test/java/org/apache/tika/ tika-core/src/test/java/org/apache/tika/detect/ tika-core/src/test/java/org/apache/tika/language/ tika-core/src/test/java/org/apache/tika/sax/ tika-core/src/test/resou...

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/TikaTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java Thu Sep  2 14:40:55 2010
@@ -1,120 +1,120 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Test cases for the {@link MagicDetector} class.
- */
-public class MagicDetectorTest extends TestCase {
-
-    public void testDetectNull() throws Exception {
-        MediaType html = new MediaType("text", "html");
-        Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
-        assertEquals(
-                MediaType.OCTET_STREAM,
-                detector.detect(null, new Metadata()));
-    }
-
-    public void testDetectSimple() throws Exception {
-        MediaType html = new MediaType("text", "html");
-        Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
-
-        assertDetect(detector, html, "<html");
-        assertDetect(detector, html, "<html><head/><body/></html>");
-        assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
-        assertDetect(detector, MediaType.OCTET_STREAM, "<?xml?><html");
-        assertDetect(detector, MediaType.OCTET_STREAM, " <html");
-        assertDetect(detector, MediaType.OCTET_STREAM, "");
-    }
-
-    public void testDetectOffsetRange() throws Exception {
-        MediaType html = new MediaType("text", "html");
-        Detector detector = new MagicDetector(
-                html, "<html".getBytes("ASCII"), null, 0, 64);
-
-        assertDetect(detector, html, "<html");
-        assertDetect(detector, html, "<html><head/><body/></html>");
-        assertDetect(detector, html, "<?xml?><html/>");
-        assertDetect(detector, html, "\n    <html");
-        assertDetect(detector, html, "\u0000<html");
-        assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
-        assertDetect(detector, MediaType.OCTET_STREAM, " html");
-        assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
-
-        assertDetect(detector, html,
-                "0........1.........2.........3.........4.........5.........6"
-                + "1234<html");
-        assertDetect(detector, MediaType.OCTET_STREAM,
-                "0........1.........2.........3.........4.........5.........6"
-                + "12345<html");
-
-        assertDetect(detector, MediaType.OCTET_STREAM, "");
-}
-
-    public void testDetectMask() throws Exception {
-        MediaType html = new MediaType("text", "html");
-        byte up = (byte) 0xdf;
-        Detector detector = new MagicDetector(
-                html,
-                new byte[] { '<',  'H',  'T',  'M',  'L' },
-                new byte[] { (byte) 0xff, up, up, up, up },
-                0, 64);
-
-        assertDetect(detector, html, "<html");
-        assertDetect(detector, html, "<HTML><head/><body/></html>");
-        assertDetect(detector, html, "<?xml?><HtMl/>");
-        assertDetect(detector, html, "\n    <html");
-        assertDetect(detector, html, "\u0000<HTML");
-        assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
-        assertDetect(detector, MediaType.OCTET_STREAM, " html");
-
-        assertDetect(detector, html,
-                "0        1         2         3         4         5         6"
-                + "1234<html");
-        assertDetect(detector, MediaType.OCTET_STREAM,
-                "0        1         2         3         4         5         6"
-                + "12345<html");
-
-        assertDetect(detector, MediaType.OCTET_STREAM, "");
-    }
-
-    private void assertDetect(Detector detector, MediaType type, String data) {
-        try {
-            byte[] bytes = data.getBytes("ASCII");
-            InputStream stream = new ByteArrayInputStream(bytes);
-            assertEquals(type, detector.detect(stream, new Metadata()));
-
-            // Test that the stream has been reset
-            for (int i = 0; i < bytes.length; i++) {
-                assertEquals(bytes[i], (byte) stream.read());
-            }
-            assertEquals(-1, stream.read());
-        } catch (IOException e) {
-            fail("Unexpected exception from MagicDetector");
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Test cases for the {@link MagicDetector} class.
+ */
+public class MagicDetectorTest extends TestCase {
+
+    public void testDetectNull() throws Exception {
+        MediaType html = new MediaType("text", "html");
+        Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+        assertEquals(
+                MediaType.OCTET_STREAM,
+                detector.detect(null, new Metadata()));
+    }
+
+    public void testDetectSimple() throws Exception {
+        MediaType html = new MediaType("text", "html");
+        Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+
+        assertDetect(detector, html, "<html");
+        assertDetect(detector, html, "<html><head/><body/></html>");
+        assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
+        assertDetect(detector, MediaType.OCTET_STREAM, "<?xml?><html");
+        assertDetect(detector, MediaType.OCTET_STREAM, " <html");
+        assertDetect(detector, MediaType.OCTET_STREAM, "");
+    }
+
+    public void testDetectOffsetRange() throws Exception {
+        MediaType html = new MediaType("text", "html");
+        Detector detector = new MagicDetector(
+                html, "<html".getBytes("ASCII"), null, 0, 64);
+
+        assertDetect(detector, html, "<html");
+        assertDetect(detector, html, "<html><head/><body/></html>");
+        assertDetect(detector, html, "<?xml?><html/>");
+        assertDetect(detector, html, "\n    <html");
+        assertDetect(detector, html, "\u0000<html");
+        assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
+        assertDetect(detector, MediaType.OCTET_STREAM, " html");
+        assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
+
+        assertDetect(detector, html,
+                "0........1.........2.........3.........4.........5.........6"
+                + "1234<html");
+        assertDetect(detector, MediaType.OCTET_STREAM,
+                "0........1.........2.........3.........4.........5.........6"
+                + "12345<html");
+
+        assertDetect(detector, MediaType.OCTET_STREAM, "");
+}
+
+    public void testDetectMask() throws Exception {
+        MediaType html = new MediaType("text", "html");
+        byte up = (byte) 0xdf;
+        Detector detector = new MagicDetector(
+                html,
+                new byte[] { '<',  'H',  'T',  'M',  'L' },
+                new byte[] { (byte) 0xff, up, up, up, up },
+                0, 64);
+
+        assertDetect(detector, html, "<html");
+        assertDetect(detector, html, "<HTML><head/><body/></html>");
+        assertDetect(detector, html, "<?xml?><HtMl/>");
+        assertDetect(detector, html, "\n    <html");
+        assertDetect(detector, html, "\u0000<HTML");
+        assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
+        assertDetect(detector, MediaType.OCTET_STREAM, " html");
+
+        assertDetect(detector, html,
+                "0        1         2         3         4         5         6"
+                + "1234<html");
+        assertDetect(detector, MediaType.OCTET_STREAM,
+                "0        1         2         3         4         5         6"
+                + "12345<html");
+
+        assertDetect(detector, MediaType.OCTET_STREAM, "");
+    }
+
+    private void assertDetect(Detector detector, MediaType type, String data) {
+        try {
+            byte[] bytes = data.getBytes("ASCII");
+            InputStream stream = new ByteArrayInputStream(bytes);
+            assertEquals(type, detector.detect(stream, new Metadata()));
+
+            // Test that the stream has been reset
+            for (int i = 0; i < bytes.length; i++) {
+                assertEquals(bytes[i], (byte) stream.read());
+            }
+            assertEquals(-1, stream.read());
+        } catch (IOException e) {
+            fail("Unexpected exception from MagicDetector");
+        }
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java Thu Sep  2 14:40:55 2010
@@ -1,92 +1,92 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-import junit.framework.TestCase;
-
-/**
- * Test cases for the {@link NameDetector} class.
- */
-public class NameDetectorTest extends TestCase {
-
-    private Detector detector;
-
-    protected void setUp() {
-        Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>();
-        patterns.put(
-                Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE),
-                MediaType.TEXT_PLAIN);
-        patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
-        detector = new NameDetector(patterns);
-    }
-
-    public void testDetect() {
-        assertDetect(MediaType.TEXT_PLAIN, "text.txt");
-        assertDetect(MediaType.TEXT_PLAIN, "text.txt ");    // trailing space
-        assertDetect(MediaType.TEXT_PLAIN, "text.txt\n");   // trailing newline
-        assertDetect(MediaType.TEXT_PLAIN, "text.txt?a=b"); // URL query
-        assertDetect(MediaType.TEXT_PLAIN, "text.txt#abc"); // URL fragment
-        assertDetect(MediaType.TEXT_PLAIN, "text%2Etxt");   // URL encoded
-        assertDetect(MediaType.TEXT_PLAIN, "text.TXT");     // case insensitive
-        assertDetect(MediaType.OCTET_STREAM, "text.txt.gz");
-
-        assertDetect(MediaType.TEXT_PLAIN, "README");
-        assertDetect(MediaType.TEXT_PLAIN, " README ");     // space around
-        assertDetect(MediaType.TEXT_PLAIN, "\tREADME\n");   // other whitespace
-        assertDetect(MediaType.TEXT_PLAIN, "/a/README");    // leading path
-        assertDetect(MediaType.TEXT_PLAIN, "\\b\\README");  // windows path
-        assertDetect(MediaType.OCTET_STREAM, "ReadMe");     // case sensitive
-        assertDetect(MediaType.OCTET_STREAM, "README.NOW");
-
-        // tough one
-        assertDetect(
-                MediaType.TEXT_PLAIN,
-                " See http://www.example.com:1234/README.txt?a=b#c \n");
-        assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
-        assertDetect(MediaType.OCTET_STREAM, "See README");   // but not this
-
-        // test also the zero input cases
-        assertDetect(MediaType.OCTET_STREAM, "");
-        assertDetect(MediaType.OCTET_STREAM, null);
-        try {
-            assertEquals(
-                    MediaType.OCTET_STREAM,
-                    detector.detect(null, new Metadata()));
-        } catch (IOException e) {
-            fail("NameDetector should never throw an IOException");
-        }
-    }
-
-    private void assertDetect(MediaType type, String name){
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.RESOURCE_NAME_KEY, name);
-        try {
-            assertEquals(type, detector.detect(null, metadata));
-        } catch (IOException e) {
-            fail("NameDetector should never throw an IOException");
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+import junit.framework.TestCase;
+
+/**
+ * Test cases for the {@link NameDetector} class.
+ */
+public class NameDetectorTest extends TestCase {
+
+    private Detector detector;
+
+    protected void setUp() {
+        Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>();
+        patterns.put(
+                Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE),
+                MediaType.TEXT_PLAIN);
+        patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
+        detector = new NameDetector(patterns);
+    }
+
+    public void testDetect() {
+        assertDetect(MediaType.TEXT_PLAIN, "text.txt");
+        assertDetect(MediaType.TEXT_PLAIN, "text.txt ");    // trailing space
+        assertDetect(MediaType.TEXT_PLAIN, "text.txt\n");   // trailing newline
+        assertDetect(MediaType.TEXT_PLAIN, "text.txt?a=b"); // URL query
+        assertDetect(MediaType.TEXT_PLAIN, "text.txt#abc"); // URL fragment
+        assertDetect(MediaType.TEXT_PLAIN, "text%2Etxt");   // URL encoded
+        assertDetect(MediaType.TEXT_PLAIN, "text.TXT");     // case insensitive
+        assertDetect(MediaType.OCTET_STREAM, "text.txt.gz");
+
+        assertDetect(MediaType.TEXT_PLAIN, "README");
+        assertDetect(MediaType.TEXT_PLAIN, " README ");     // space around
+        assertDetect(MediaType.TEXT_PLAIN, "\tREADME\n");   // other whitespace
+        assertDetect(MediaType.TEXT_PLAIN, "/a/README");    // leading path
+        assertDetect(MediaType.TEXT_PLAIN, "\\b\\README");  // windows path
+        assertDetect(MediaType.OCTET_STREAM, "ReadMe");     // case sensitive
+        assertDetect(MediaType.OCTET_STREAM, "README.NOW");
+
+        // tough one
+        assertDetect(
+                MediaType.TEXT_PLAIN,
+                " See http://www.example.com:1234/README.txt?a=b#c \n");
+        assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
+        assertDetect(MediaType.OCTET_STREAM, "See README");   // but not this
+
+        // test also the zero input cases
+        assertDetect(MediaType.OCTET_STREAM, "");
+        assertDetect(MediaType.OCTET_STREAM, null);
+        try {
+            assertEquals(
+                    MediaType.OCTET_STREAM,
+                    detector.detect(null, new Metadata()));
+        } catch (IOException e) {
+            fail("NameDetector should never throw an IOException");
+        }
+    }
+
+    private void assertDetect(MediaType type, String name){
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, name);
+        try {
+            assertEquals(type, detector.detect(null, metadata));
+        } catch (IOException e) {
+            fail("NameDetector should never throw an IOException");
+        }
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java Thu Sep  2 14:40:55 2010
@@ -1,99 +1,99 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Test cases for the {@link TextDetector} class.
- */
-public class TextDetectorTest extends TestCase {
-
-    private final Detector detector = new TextDetector();
-
-    public void testDetectNull() throws Exception {
-        assertEquals(
-                MediaType.OCTET_STREAM,
-                detector.detect(null, new Metadata()));
-    }
-
-    /**
-     * Test for type detection of empty documents.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-483">TIKA-483</a>
-     */
-    public void testDetectEmpty() throws Exception {
-        assertNotText(new byte[0]);
-    }
-
-    public void testDetectText() throws Exception {
-        assertText("Hello, World!".getBytes("UTF-8"));
-        assertText(" \t\r\n".getBytes("UTF-8"));
-        assertText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
-        assertNotText(new byte[] { 0 });
-        assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });
-
-        byte[] data = new byte[512];
-        Arrays.fill(data, (byte) '.');
-        assertText(data);
-        Arrays.fill(data, (byte) 0x1f);
-        assertNotText(data);
-
-        data = new byte[513];
-        Arrays.fill(data, (byte) '.');
-        assertText(data);
-        Arrays.fill(data, (byte) 0x1f);
-        assertNotText(data);
-    }
-
-    private void assertText(byte[] data) {
-        try {
-            InputStream stream = new ByteArrayInputStream(data);
-            assertEquals(
-                    MediaType.TEXT_PLAIN,
-                    detector.detect(stream, new Metadata()));
-
-            // Test that the stream has been reset
-            for (int i = 0; i < data.length; i++) {
-                assertEquals(data[i], (byte) stream.read());
-            }
-            assertEquals(-1, stream.read());
-        } catch (IOException e) {
-            fail("Unexpected exception from TextDetector");
-        }
-    }
-
-    private void assertNotText(byte[] data) {
-        try {
-            assertEquals(
-                    MediaType.OCTET_STREAM,
-                    detector.detect(
-                            new ByteArrayInputStream(data), new Metadata()));
-        } catch (IOException e) {
-            fail("Unexpected exception from TextDetector");
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Test cases for the {@link TextDetector} class.
+ */
+public class TextDetectorTest extends TestCase {
+
+    private final Detector detector = new TextDetector();
+
+    public void testDetectNull() throws Exception {
+        assertEquals(
+                MediaType.OCTET_STREAM,
+                detector.detect(null, new Metadata()));
+    }
+
+    /**
+     * Test for type detection of empty documents.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-483">TIKA-483</a>
+     */
+    public void testDetectEmpty() throws Exception {
+        assertNotText(new byte[0]);
+    }
+
+    public void testDetectText() throws Exception {
+        assertText("Hello, World!".getBytes("UTF-8"));
+        assertText(" \t\r\n".getBytes("UTF-8"));
+        assertText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
+        assertNotText(new byte[] { 0 });
+        assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });
+
+        byte[] data = new byte[512];
+        Arrays.fill(data, (byte) '.');
+        assertText(data);
+        Arrays.fill(data, (byte) 0x1f);
+        assertNotText(data);
+
+        data = new byte[513];
+        Arrays.fill(data, (byte) '.');
+        assertText(data);
+        Arrays.fill(data, (byte) 0x1f);
+        assertNotText(data);
+    }
+
+    private void assertText(byte[] data) {
+        try {
+            InputStream stream = new ByteArrayInputStream(data);
+            assertEquals(
+                    MediaType.TEXT_PLAIN,
+                    detector.detect(stream, new Metadata()));
+
+            // Test that the stream has been reset
+            for (int i = 0; i < data.length; i++) {
+                assertEquals(data[i], (byte) stream.read());
+            }
+            assertEquals(-1, stream.read());
+        } catch (IOException e) {
+            fail("Unexpected exception from TextDetector");
+        }
+    }
+
+    private void assertNotText(byte[] data) {
+        try {
+            assertEquals(
+                    MediaType.OCTET_STREAM,
+                    detector.detect(
+                            new ByteArrayInputStream(data), new Metadata()));
+        } catch (IOException e) {
+            fail("Unexpected exception from TextDetector");
+        }
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java Thu Sep  2 14:40:55 2010
@@ -1,75 +1,75 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.TreeMap;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Test cases for the {@link TypeDetector} class.
- */
-public class TypeDetectorTest extends TestCase {
-
-    private Detector detector = new TypeDetector();
-    
-    private static final Map<String, String> params = new
-        TreeMap<String, String>();
-    static{
-        params.put("a", "b");
-    }
-    
-    private static final MediaType TEXT_PLAIN_A_EQ_B = 
-          new MediaType("text", "plain", params);
-
-    public void testDetect() {
-        assertDetect(MediaType.TEXT_PLAIN, "text/plain");
-        assertDetect(MediaType.TEXT_PLAIN, "TEXT/PLAIN");
-        assertDetect(MediaType.TEXT_PLAIN, " text/\tplain\n");
-        assertDetect(TEXT_PLAIN_A_EQ_B, "text/plain; a=b");
-        assertDetect(TEXT_PLAIN_A_EQ_B, "\ttext/plain; a=b\n");
-
-        assertDetect(MediaType.OCTET_STREAM, "text\\plain");
-
-        // test also the zero input cases
-        assertDetect(MediaType.OCTET_STREAM, "");
-        assertDetect(MediaType.OCTET_STREAM, null);
-        try {
-            assertEquals(
-                    MediaType.OCTET_STREAM,
-                    detector.detect(null, new Metadata()));
-        } catch (IOException e) {
-            fail("TypeDetector should never throw an IOException");
-        }
-    }
-
-    private void assertDetect(MediaType type, String name){
-        Metadata metadata = new Metadata();
-        metadata.set(Metadata.CONTENT_TYPE, name);
-        try {
-            assertEquals(type, detector.detect(null, metadata));
-        } catch (IOException e) {
-            fail("TypeDetector should never throw an IOException");
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Test cases for the {@link TypeDetector} class.
+ */
+public class TypeDetectorTest extends TestCase {
+
+    private Detector detector = new TypeDetector();
+    
+    private static final Map<String, String> params = new
+        TreeMap<String, String>();
+    static{
+        params.put("a", "b");
+    }
+    
+    private static final MediaType TEXT_PLAIN_A_EQ_B = 
+          new MediaType("text", "plain", params);
+
+    public void testDetect() {
+        assertDetect(MediaType.TEXT_PLAIN, "text/plain");
+        assertDetect(MediaType.TEXT_PLAIN, "TEXT/PLAIN");
+        assertDetect(MediaType.TEXT_PLAIN, " text/\tplain\n");
+        assertDetect(TEXT_PLAIN_A_EQ_B, "text/plain; a=b");
+        assertDetect(TEXT_PLAIN_A_EQ_B, "\ttext/plain; a=b\n");
+
+        assertDetect(MediaType.OCTET_STREAM, "text\\plain");
+
+        // test also the zero input cases
+        assertDetect(MediaType.OCTET_STREAM, "");
+        assertDetect(MediaType.OCTET_STREAM, null);
+        try {
+            assertEquals(
+                    MediaType.OCTET_STREAM,
+                    detector.detect(null, new Metadata()));
+        } catch (IOException e) {
+            fail("TypeDetector should never throw an IOException");
+        }
+    }
+
+    private void assertDetect(MediaType type, String name){
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, name);
+        try {
+            assertEquals(type, detector.detect(null, metadata));
+        } catch (IOException e) {
+            fail("TypeDetector should never throw an IOException");
+        }
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java Thu Sep  2 14:40:55 2010
@@ -1,54 +1,54 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.language;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-public class LanguageProfileTest extends TestCase {
-
-    public void testLanguageProfile() throws IOException {
-        LanguageProfile foo = new LanguageProfile();
-        assertEquals(0, foo.getCount("foo"));
-
-        foo.add("foo");
-        assertEquals(1, foo.getCount("foo"));
-
-        foo.add("foo", 3);
-        assertEquals(4, foo.getCount("foo"));
-
-        LanguageProfile bar = new LanguageProfile();
-        assertEquals(1.0, foo.distance(bar));
-
-        bar.add("bar");
-        assertEquals(Math.sqrt(2.0), foo.distance(bar));
-
-        bar.add("bar", 3);
-        assertEquals(Math.sqrt(2.0), foo.distance(bar));
-
-        LanguageProfile foobar = new LanguageProfile();
-        assertTrue(foo.distance(foobar) == bar.distance(foobar));
-
-        foobar.add("foo");
-        assertTrue( foo.distance(foobar) < bar.distance(foobar));
-
-        foobar.add("bar");
-        assertTrue(foo.distance(foobar) == bar.distance(foobar));
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.language;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+public class LanguageProfileTest extends TestCase {
+
+    public void testLanguageProfile() throws IOException {
+        LanguageProfile foo = new LanguageProfile();
+        assertEquals(0, foo.getCount("foo"));
+
+        foo.add("foo");
+        assertEquals(1, foo.getCount("foo"));
+
+        foo.add("foo", 3);
+        assertEquals(4, foo.getCount("foo"));
+
+        LanguageProfile bar = new LanguageProfile();
+        assertEquals(1.0, foo.distance(bar));
+
+        bar.add("bar");
+        assertEquals(Math.sqrt(2.0), foo.distance(bar));
+
+        bar.add("bar", 3);
+        assertEquals(Math.sqrt(2.0), foo.distance(bar));
+
+        LanguageProfile foobar = new LanguageProfile();
+        assertTrue(foo.distance(foobar) == bar.distance(foobar));
+
+        foobar.add("foo");
+        assertTrue( foo.distance(foobar) < bar.distance(foobar));
+
+        foobar.add("bar");
+        assertTrue(foo.distance(foobar) == bar.distance(foobar));
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java Thu Sep  2 14:40:55 2010
@@ -1,41 +1,41 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.language;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-public class ProfilingWriterTest extends TestCase {
-
-    public void testProfilingWriter() throws IOException {
-        ProfilingWriter writer = new ProfilingWriter();
-        writer.write(" foo+BAR FooBar\n");
-        writer.close();
-
-        LanguageProfile profile = writer.getProfile();
-        assertEquals(2, profile.getCount("_fo"));
-        assertEquals(2, profile.getCount("foo"));
-        assertEquals(1, profile.getCount("oo_"));
-        assertEquals(1, profile.getCount("oob"));
-        assertEquals(1, profile.getCount("oba"));
-        assertEquals(1, profile.getCount("_ba"));
-        assertEquals(2, profile.getCount("bar"));
-        assertEquals(2, profile.getCount("ar_"));
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.language;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+public class ProfilingWriterTest extends TestCase {
+
+    public void testProfilingWriter() throws IOException {
+        ProfilingWriter writer = new ProfilingWriter();
+        writer.write(" foo+BAR FooBar\n");
+        writer.close();
+
+        LanguageProfile profile = writer.getProfile();
+        assertEquals(2, profile.getCount("_fo"));
+        assertEquals(2, profile.getCount("foo"));
+        assertEquals(1, profile.getCount("oo_"));
+        assertEquals(1, profile.getCount("oob"));
+        assertEquals(1, profile.getCount("oba"));
+        assertEquals(1, profile.getCount("_ba"));
+        assertEquals(2, profile.getCount("bar"));
+        assertEquals(2, profile.getCount("ar_"));
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java Thu Sep  2 14:40:55 2010
@@ -1,49 +1,49 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.ByteArrayOutputStream;
-import java.io.OutputStream;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-
-/**
- * Test cases for the {@link BodyContentHandler} class.
- */
-public class BodyContentHandlerTest extends TestCase {
-
-    /**
-     * Test that the conversion to an {@link OutputStream} doesn't leave
-     * characters unflushed in an internal buffer.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
-     */
-    public void testOutputStream() throws Exception {
-        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(
-                new BodyContentHandler(buffer), new Metadata());
-        xhtml.startDocument();
-        xhtml.element("p", "Test text");
-        xhtml.endDocument();
-
-        assertEquals("Test text\n", buffer.toString());
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.ByteArrayOutputStream;
+import java.io.OutputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * Test cases for the {@link BodyContentHandler} class.
+ */
+public class BodyContentHandlerTest extends TestCase {
+
+    /**
+     * Test that the conversion to an {@link OutputStream} doesn't leave
+     * characters unflushed in an internal buffer.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+     */
+    public void testOutputStream() throws Exception {
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(
+                new BodyContentHandler(buffer), new Metadata());
+        xhtml.startDocument();
+        xhtml.element("p", "Test text");
+        xhtml.endDocument();
+
+        assertEquals("Test text\n", buffer.toString());
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java Thu Sep  2 14:40:55 2010
@@ -1,66 +1,66 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.StringReader;
-import java.net.ConnectException;
-
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-import junit.framework.TestCase;
-
-import org.xml.sax.InputSource;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * Unit tests for the {@link OfflineContentHandler} class.
- */
-public class OfflineContentHandlerTest extends TestCase {
-
-    private SAXParser parser;
-
-    private DefaultHandler offline;
-
-    protected void setUp() throws Exception {
-        parser = SAXParserFactory.newInstance().newSAXParser();
-        offline = new OfflineContentHandler(new DefaultHandler());
-    }
-
-    public void testExternalDTD() throws Exception {
-        String xml =
-            "<!DOCTYPE foo SYSTEM \"http://127.234.172.38:7845/bar\"><foo/>";
-        try {
-            parser.parse(new InputSource(new StringReader(xml)), offline);
-        } catch (ConnectException e) {
-            fail("Parser tried to access the external DTD:" + e);
-        }
-    }
-
-    public void testExternalEntity() throws Exception {
-        String xml =
-            "<!DOCTYPE foo ["
-            + " <!ENTITY bar SYSTEM \"http://127.234.172.38:7845/bar\">"
-            + " ]><foo>&bar;</foo>";
-        try {
-            parser.parse(new InputSource(new StringReader(xml)), offline);
-        } catch (ConnectException e) {
-            fail("Parser tried to access the external DTD:" + e);
-        }
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.StringReader;
+import java.net.ConnectException;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Unit tests for the {@link OfflineContentHandler} class.
+ */
+public class OfflineContentHandlerTest extends TestCase {
+
+    private SAXParser parser;
+
+    private DefaultHandler offline;
+
+    protected void setUp() throws Exception {
+        parser = SAXParserFactory.newInstance().newSAXParser();
+        offline = new OfflineContentHandler(new DefaultHandler());
+    }
+
+    public void testExternalDTD() throws Exception {
+        String xml =
+            "<!DOCTYPE foo SYSTEM \"http://127.234.172.38:7845/bar\"><foo/>";
+        try {
+            parser.parse(new InputSource(new StringReader(xml)), offline);
+        } catch (ConnectException e) {
+            fail("Parser tried to access the external DTD:" + e);
+        }
+    }
+
+    public void testExternalEntity() throws Exception {
+        String xml =
+            "<!DOCTYPE foo ["
+            + " <!ENTITY bar SYSTEM \"http://127.234.172.38:7845/bar\">"
+            + " ]><foo>&bar;</foo>";
+        try {
+            parser.parse(new InputSource(new StringReader(xml)), offline);
+        } catch (ConnectException e) {
+            fail("Parser tried to access the external DTD:" + e);
+        }
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java Thu Sep  2 14:40:55 2010
@@ -1,68 +1,68 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import junit.framework.TestCase;
-
-/**
- * Unit tests for the {@link SafeContentHandler} class.
- */
-public class SafeContentHandlerTest extends TestCase {
-
-    private ContentHandler output;
-
-    private ContentHandler safe;
-
-    protected void setUp() {
-        output = new WriteOutContentHandler();
-        safe = new SafeContentHandler(output);
-    }
-
-    public void testEmptyInput() throws SAXException {
-        safe.characters(new char[0], 0, 0);
-        safe.ignorableWhitespace(new char[0], 0, 0);
-        assertEquals("", output.toString());
-    }
-
-    public void testNormalCharacters() throws SAXException {
-        safe.characters("abc".toCharArray(), 0, 3);
-        assertEquals("abc", output.toString());
-    }
-
-    public void testNormalWhitespace() throws SAXException {
-        safe.ignorableWhitespace("abc".toCharArray(), 0, 3);
-        assertEquals("abc", output.toString());
-    }
-
-    public void testInvalidCharacters() throws SAXException {
-        safe.characters("ab\u0007".toCharArray(), 0, 3);
-        safe.characters("a\u000Bc".toCharArray(), 0, 3);
-        safe.characters("\u0019bc".toCharArray(), 0, 3);
-        assertEquals("ab a c bc", output.toString());
-    }
-
-    public void testInvalidWhitespace() throws SAXException {
-        safe.ignorableWhitespace("ab\u0000".toCharArray(), 0, 3);
-        safe.ignorableWhitespace("a\u0001c".toCharArray(), 0, 3);
-        safe.ignorableWhitespace("\u0002bc".toCharArray(), 0, 3);
-        assertEquals("ab a c bc", output.toString());
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for the {@link SafeContentHandler} class.
+ */
+public class SafeContentHandlerTest extends TestCase {
+
+    private ContentHandler output;
+
+    private ContentHandler safe;
+
+    protected void setUp() {
+        output = new WriteOutContentHandler();
+        safe = new SafeContentHandler(output);
+    }
+
+    public void testEmptyInput() throws SAXException {
+        safe.characters(new char[0], 0, 0);
+        safe.ignorableWhitespace(new char[0], 0, 0);
+        assertEquals("", output.toString());
+    }
+
+    public void testNormalCharacters() throws SAXException {
+        safe.characters("abc".toCharArray(), 0, 3);
+        assertEquals("abc", output.toString());
+    }
+
+    public void testNormalWhitespace() throws SAXException {
+        safe.ignorableWhitespace("abc".toCharArray(), 0, 3);
+        assertEquals("abc", output.toString());
+    }
+
+    public void testInvalidCharacters() throws SAXException {
+        safe.characters("ab\u0007".toCharArray(), 0, 3);
+        safe.characters("a\u000Bc".toCharArray(), 0, 3);
+        safe.characters("\u0019bc".toCharArray(), 0, 3);
+        assertEquals("ab a c bc", output.toString());
+    }
+
+    public void testInvalidWhitespace() throws SAXException {
+        safe.ignorableWhitespace("ab\u0000".toCharArray(), 0, 3);
+        safe.ignorableWhitespace("a\u0001c".toCharArray(), 0, 3);
+        safe.ignorableWhitespace("\u0002bc".toCharArray(), 0, 3);
+        assertEquals("ab a c bc", output.toString());
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java Thu Sep  2 14:40:55 2010
@@ -1,77 +1,77 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import org.apache.tika.metadata.Metadata;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import junit.framework.TestCase;
-
-/**
- * Unit tests for the {@link XHTMLContentHandler} class.
- */
-public class XHTMLContentHandlerTest extends TestCase {
-
-    private ContentHandler output;
-
-    private XHTMLContentHandler xhtml;
-
-    protected void setUp() {
-        output = new BodyContentHandler();
-        xhtml = new XHTMLContentHandler(output, new Metadata());
-    }
-
-    /**
-     * Test that content in block elements are properly separated in text
-     * output.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-188">TIKA-188</a>
-     */
-    public void testExtraWhitespace() throws SAXException {
-        xhtml.startDocument();
-
-        xhtml.element("p", "foo");
-        xhtml.startElement("p");
-        xhtml.characters("b");
-        xhtml.element("b", "a"); // inlines should not cause extra whitespace
-        xhtml.characters("r");
-        xhtml.endElement("p");
-
-        xhtml.startElement("table");
-        xhtml.startElement("tr");
-        xhtml.element("th", "x");
-        xhtml.element("th", "y");
-        xhtml.endElement("tr");
-        xhtml.startElement("tr");
-        xhtml.element("td", "a");
-        xhtml.element("td", "b");
-        xhtml.endElement("tr");
-        xhtml.endElement("table");
-        xhtml.endDocument();
-
-        String[] words = output.toString().split("\\s+");
-        assertEquals(6, words.length);
-        assertEquals("foo", words[0]);
-        assertEquals("bar", words[1]);
-        assertEquals("x", words[2]);
-        assertEquals("y", words[3]);
-        assertEquals("a", words[4]);
-        assertEquals("b", words[5]);
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for the {@link XHTMLContentHandler} class.
+ */
+public class XHTMLContentHandlerTest extends TestCase {
+
+    private ContentHandler output;
+
+    private XHTMLContentHandler xhtml;
+
+    protected void setUp() {
+        output = new BodyContentHandler();
+        xhtml = new XHTMLContentHandler(output, new Metadata());
+    }
+
+    /**
+     * Test that content in block elements are properly separated in text
+     * output.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-188">TIKA-188</a>
+     */
+    public void testExtraWhitespace() throws SAXException {
+        xhtml.startDocument();
+
+        xhtml.element("p", "foo");
+        xhtml.startElement("p");
+        xhtml.characters("b");
+        xhtml.element("b", "a"); // inlines should not cause extra whitespace
+        xhtml.characters("r");
+        xhtml.endElement("p");
+
+        xhtml.startElement("table");
+        xhtml.startElement("tr");
+        xhtml.element("th", "x");
+        xhtml.element("th", "y");
+        xhtml.endElement("tr");
+        xhtml.startElement("tr");
+        xhtml.element("td", "a");
+        xhtml.element("td", "b");
+        xhtml.endElement("tr");
+        xhtml.endElement("table");
+        xhtml.endDocument();
+
+        String[] words = output.toString().split("\\s+");
+        assertEquals(6, words.length);
+        assertEquals("foo", words[0]);
+        assertEquals("bar", words[1]);
+        assertEquals("x", words[2]);
+        assertEquals("y", words[3]);
+        assertEquals("a", words[4]);
+        assertEquals("b", words[5]);
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg Thu Sep  2 14:40:55 2010
@@ -1,8 +1,8 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg:svg xmlns:svg="http://www.w3.org/2000/svg" width="12cm" height="12cm">
-  <svg:g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
-    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
-    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
-    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
-  </svg:g>
+<?xml version="1.0" encoding="UTF-8"?>
+<svg:svg xmlns:svg="http://www.w3.org/2000/svg" width="12cm" height="12cm">
+  <svg:g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
+    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
+    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
+    <svg:circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
+  </svg:g>
 </svg:svg>
\ No newline at end of file

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg Thu Sep  2 14:40:55 2010
@@ -1,8 +1,8 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg xmlns="http://www.w3.org/2000/svg" width="12cm" height="12cm">
-  <g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
-    <circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
-    <circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
-    <circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
-  </g>
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="12cm" height="12cm">
+  <g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
+    <circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
+    <circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
+    <circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
+  </g>
 </svg>
\ No newline at end of file

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl Thu Sep  2 14:40:55 2010
@@ -1,9 +1,9 @@
-<?xml version="1.0" encoding="utf-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-
-  <xsl:output method="xml" indent="yes"/>
-
-  <xsl:template match="/">
-    <test hello="world"/>
-  </xsl:template>
-</xsl:stylesheet>
+<?xml version="1.0" encoding="utf-8"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+  <xsl:output method="xml" indent="yes"/>
+
+  <xsl:template match="/">
+    <test hello="world"/>
+  </xsl:template>
+</xsl:stylesheet>

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml Thu Sep  2 14:40:55 2010
@@ -1,2 +1,2 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
+<?xml version="1.0" encoding="ISO-8859-1"?>
 <test hello="world"/>
\ No newline at end of file

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml Thu Sep  2 14:40:55 2010
@@ -1,21 +1,21 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one
-  or more contributor license agreements.  See the NOTICE file
-  distributed with this work for additional information
-  regarding copyright ownership.  The ASF licenses this file
-  to you under the Apache License, Version 2.0 (the
-  "License"); you may not use this file except in compliance
-  with the License.  You may obtain a copy of the License at
-  
-  http://www.apache.org/licenses/LICENSE-2.0
-  
-  Unless required by applicable law or agreed to in writing,
-  software distributed under the License is distributed on an
-  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-  KIND, either express or implied.  See the License for the
-  specific language governing permissions and limitations
-  under the License.
--->
-<?somepi blahblah test="ignore-me.xml" ?>
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<?somepi blahblah test="ignore-me.xml" ?>
 <test hello="world"/>
\ No newline at end of file

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml Thu Sep  2 14:40:55 2010
@@ -1,2 +1,2 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version="1.0" encoding="UTF-8"?>
 <test hello="world"/>
\ No newline at end of file

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html Thu Sep  2 14:40:55 2010
@@ -1,10 +1,10 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>Hello World</title>
-</head>
-<body>
-  <p>Hello World!<p/>
-</body>
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>Hello World</title>
+</head>
+<body>
+  <p>Hello World!<p/>
+</body>
 </html>
\ No newline at end of file

Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html
------------------------------------------------------------------------------
    svn:eol-style = native