You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/02 16:40:56 UTC
svn commit: r991955 [3/6] - in /tika/trunk:
tika-core/src/test/java/org/apache/tika/
tika-core/src/test/java/org/apache/tika/detect/
tika-core/src/test/java/org/apache/tika/language/
tika-core/src/test/java/org/apache/tika/sax/ tika-core/src/test/resou...
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/TikaTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java Thu Sep 2 14:40:55 2010
@@ -1,120 +1,120 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Test cases for the {@link MagicDetector} class.
- */
-public class MagicDetectorTest extends TestCase {
-
- public void testDetectNull() throws Exception {
- MediaType html = new MediaType("text", "html");
- Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
- assertEquals(
- MediaType.OCTET_STREAM,
- detector.detect(null, new Metadata()));
- }
-
- public void testDetectSimple() throws Exception {
- MediaType html = new MediaType("text", "html");
- Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
-
- assertDetect(detector, html, "<html");
- assertDetect(detector, html, "<html><head/><body/></html>");
- assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
- assertDetect(detector, MediaType.OCTET_STREAM, "<?xml?><html");
- assertDetect(detector, MediaType.OCTET_STREAM, " <html");
- assertDetect(detector, MediaType.OCTET_STREAM, "");
- }
-
- public void testDetectOffsetRange() throws Exception {
- MediaType html = new MediaType("text", "html");
- Detector detector = new MagicDetector(
- html, "<html".getBytes("ASCII"), null, 0, 64);
-
- assertDetect(detector, html, "<html");
- assertDetect(detector, html, "<html><head/><body/></html>");
- assertDetect(detector, html, "<?xml?><html/>");
- assertDetect(detector, html, "\n <html");
- assertDetect(detector, html, "\u0000<html");
- assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
- assertDetect(detector, MediaType.OCTET_STREAM, " html");
- assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
-
- assertDetect(detector, html,
- "0........1.........2.........3.........4.........5.........6"
- + "1234<html");
- assertDetect(detector, MediaType.OCTET_STREAM,
- "0........1.........2.........3.........4.........5.........6"
- + "12345<html");
-
- assertDetect(detector, MediaType.OCTET_STREAM, "");
-}
-
- public void testDetectMask() throws Exception {
- MediaType html = new MediaType("text", "html");
- byte up = (byte) 0xdf;
- Detector detector = new MagicDetector(
- html,
- new byte[] { '<', 'H', 'T', 'M', 'L' },
- new byte[] { (byte) 0xff, up, up, up, up },
- 0, 64);
-
- assertDetect(detector, html, "<html");
- assertDetect(detector, html, "<HTML><head/><body/></html>");
- assertDetect(detector, html, "<?xml?><HtMl/>");
- assertDetect(detector, html, "\n <html");
- assertDetect(detector, html, "\u0000<HTML");
- assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
- assertDetect(detector, MediaType.OCTET_STREAM, " html");
-
- assertDetect(detector, html,
- "0 1 2 3 4 5 6"
- + "1234<html");
- assertDetect(detector, MediaType.OCTET_STREAM,
- "0 1 2 3 4 5 6"
- + "12345<html");
-
- assertDetect(detector, MediaType.OCTET_STREAM, "");
- }
-
- private void assertDetect(Detector detector, MediaType type, String data) {
- try {
- byte[] bytes = data.getBytes("ASCII");
- InputStream stream = new ByteArrayInputStream(bytes);
- assertEquals(type, detector.detect(stream, new Metadata()));
-
- // Test that the stream has been reset
- for (int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], (byte) stream.read());
- }
- assertEquals(-1, stream.read());
- } catch (IOException e) {
- fail("Unexpected exception from MagicDetector");
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Test cases for the {@link MagicDetector} class.
+ */
+public class MagicDetectorTest extends TestCase {
+
+ public void testDetectNull() throws Exception {
+ MediaType html = new MediaType("text", "html");
+ Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ }
+
+ public void testDetectSimple() throws Exception {
+ MediaType html = new MediaType("text", "html");
+ Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+
+ assertDetect(detector, html, "<html");
+ assertDetect(detector, html, "<html><head/><body/></html>");
+ assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
+ assertDetect(detector, MediaType.OCTET_STREAM, "<?xml?><html");
+ assertDetect(detector, MediaType.OCTET_STREAM, " <html");
+ assertDetect(detector, MediaType.OCTET_STREAM, "");
+ }
+
+ public void testDetectOffsetRange() throws Exception {
+ MediaType html = new MediaType("text", "html");
+ Detector detector = new MagicDetector(
+ html, "<html".getBytes("ASCII"), null, 0, 64);
+
+ assertDetect(detector, html, "<html");
+ assertDetect(detector, html, "<html><head/><body/></html>");
+ assertDetect(detector, html, "<?xml?><html/>");
+ assertDetect(detector, html, "\n <html");
+ assertDetect(detector, html, "\u0000<html");
+ assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
+ assertDetect(detector, MediaType.OCTET_STREAM, " html");
+ assertDetect(detector, MediaType.OCTET_STREAM, "<HTML");
+
+ assertDetect(detector, html,
+ "0........1.........2.........3.........4.........5.........6"
+ + "1234<html");
+ assertDetect(detector, MediaType.OCTET_STREAM,
+ "0........1.........2.........3.........4.........5.........6"
+ + "12345<html");
+
+ assertDetect(detector, MediaType.OCTET_STREAM, "");
+}
+
+ public void testDetectMask() throws Exception {
+ MediaType html = new MediaType("text", "html");
+ byte up = (byte) 0xdf;
+ Detector detector = new MagicDetector(
+ html,
+ new byte[] { '<', 'H', 'T', 'M', 'L' },
+ new byte[] { (byte) 0xff, up, up, up, up },
+ 0, 64);
+
+ assertDetect(detector, html, "<html");
+ assertDetect(detector, html, "<HTML><head/><body/></html>");
+ assertDetect(detector, html, "<?xml?><HtMl/>");
+ assertDetect(detector, html, "\n <html");
+ assertDetect(detector, html, "\u0000<HTML");
+ assertDetect(detector, MediaType.OCTET_STREAM, "<htm");
+ assertDetect(detector, MediaType.OCTET_STREAM, " html");
+
+ assertDetect(detector, html,
+ "0 1 2 3 4 5 6"
+ + "1234<html");
+ assertDetect(detector, MediaType.OCTET_STREAM,
+ "0 1 2 3 4 5 6"
+ + "12345<html");
+
+ assertDetect(detector, MediaType.OCTET_STREAM, "");
+ }
+
+ private void assertDetect(Detector detector, MediaType type, String data) {
+ try {
+ byte[] bytes = data.getBytes("ASCII");
+ InputStream stream = new ByteArrayInputStream(bytes);
+ assertEquals(type, detector.detect(stream, new Metadata()));
+
+ // Test that the stream has been reset
+ for (int i = 0; i < bytes.length; i++) {
+ assertEquals(bytes[i], (byte) stream.read());
+ }
+ assertEquals(-1, stream.read());
+ } catch (IOException e) {
+ fail("Unexpected exception from MagicDetector");
+ }
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java Thu Sep 2 14:40:55 2010
@@ -1,92 +1,92 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.regex.Pattern;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-import junit.framework.TestCase;
-
-/**
- * Test cases for the {@link NameDetector} class.
- */
-public class NameDetectorTest extends TestCase {
-
- private Detector detector;
-
- protected void setUp() {
- Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>();
- patterns.put(
- Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE),
- MediaType.TEXT_PLAIN);
- patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
- detector = new NameDetector(patterns);
- }
-
- public void testDetect() {
- assertDetect(MediaType.TEXT_PLAIN, "text.txt");
- assertDetect(MediaType.TEXT_PLAIN, "text.txt "); // trailing space
- assertDetect(MediaType.TEXT_PLAIN, "text.txt\n"); // trailing newline
- assertDetect(MediaType.TEXT_PLAIN, "text.txt?a=b"); // URL query
- assertDetect(MediaType.TEXT_PLAIN, "text.txt#abc"); // URL fragment
- assertDetect(MediaType.TEXT_PLAIN, "text%2Etxt"); // URL encoded
- assertDetect(MediaType.TEXT_PLAIN, "text.TXT"); // case insensitive
- assertDetect(MediaType.OCTET_STREAM, "text.txt.gz");
-
- assertDetect(MediaType.TEXT_PLAIN, "README");
- assertDetect(MediaType.TEXT_PLAIN, " README "); // space around
- assertDetect(MediaType.TEXT_PLAIN, "\tREADME\n"); // other whitespace
- assertDetect(MediaType.TEXT_PLAIN, "/a/README"); // leading path
- assertDetect(MediaType.TEXT_PLAIN, "\\b\\README"); // windows path
- assertDetect(MediaType.OCTET_STREAM, "ReadMe"); // case sensitive
- assertDetect(MediaType.OCTET_STREAM, "README.NOW");
-
- // tough one
- assertDetect(
- MediaType.TEXT_PLAIN,
- " See http://www.example.com:1234/README.txt?a=b#c \n");
- assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
- assertDetect(MediaType.OCTET_STREAM, "See README"); // but not this
-
- // test also the zero input cases
- assertDetect(MediaType.OCTET_STREAM, "");
- assertDetect(MediaType.OCTET_STREAM, null);
- try {
- assertEquals(
- MediaType.OCTET_STREAM,
- detector.detect(null, new Metadata()));
- } catch (IOException e) {
- fail("NameDetector should never throw an IOException");
- }
- }
-
- private void assertDetect(MediaType type, String name){
- Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, name);
- try {
- assertEquals(type, detector.detect(null, metadata));
- } catch (IOException e) {
- fail("NameDetector should never throw an IOException");
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+import junit.framework.TestCase;
+
+/**
+ * Test cases for the {@link NameDetector} class.
+ */
+public class NameDetectorTest extends TestCase {
+
+ private Detector detector;
+
+ protected void setUp() {
+ Map<Pattern, MediaType> patterns = new HashMap<Pattern, MediaType>();
+ patterns.put(
+ Pattern.compile(".*\\.txt", Pattern.CASE_INSENSITIVE),
+ MediaType.TEXT_PLAIN);
+ patterns.put(Pattern.compile("README"), MediaType.TEXT_PLAIN);
+ detector = new NameDetector(patterns);
+ }
+
+ public void testDetect() {
+ assertDetect(MediaType.TEXT_PLAIN, "text.txt");
+ assertDetect(MediaType.TEXT_PLAIN, "text.txt "); // trailing space
+ assertDetect(MediaType.TEXT_PLAIN, "text.txt\n"); // trailing newline
+ assertDetect(MediaType.TEXT_PLAIN, "text.txt?a=b"); // URL query
+ assertDetect(MediaType.TEXT_PLAIN, "text.txt#abc"); // URL fragment
+ assertDetect(MediaType.TEXT_PLAIN, "text%2Etxt"); // URL encoded
+ assertDetect(MediaType.TEXT_PLAIN, "text.TXT"); // case insensitive
+ assertDetect(MediaType.OCTET_STREAM, "text.txt.gz");
+
+ assertDetect(MediaType.TEXT_PLAIN, "README");
+ assertDetect(MediaType.TEXT_PLAIN, " README "); // space around
+ assertDetect(MediaType.TEXT_PLAIN, "\tREADME\n"); // other whitespace
+ assertDetect(MediaType.TEXT_PLAIN, "/a/README"); // leading path
+ assertDetect(MediaType.TEXT_PLAIN, "\\b\\README"); // windows path
+ assertDetect(MediaType.OCTET_STREAM, "ReadMe"); // case sensitive
+ assertDetect(MediaType.OCTET_STREAM, "README.NOW");
+
+ // tough one
+ assertDetect(
+ MediaType.TEXT_PLAIN,
+ " See http://www.example.com:1234/README.txt?a=b#c \n");
+ assertDetect(MediaType.TEXT_PLAIN, "See README.txt"); // even this!
+ assertDetect(MediaType.OCTET_STREAM, "See README"); // but not this
+
+ // test also the zero input cases
+ assertDetect(MediaType.OCTET_STREAM, "");
+ assertDetect(MediaType.OCTET_STREAM, null);
+ try {
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ } catch (IOException e) {
+ fail("NameDetector should never throw an IOException");
+ }
+ }
+
+ private void assertDetect(MediaType type, String name){
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, name);
+ try {
+ assertEquals(type, detector.detect(null, metadata));
+ } catch (IOException e) {
+ fail("NameDetector should never throw an IOException");
+ }
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java Thu Sep 2 14:40:55 2010
@@ -1,99 +1,99 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Test cases for the {@link TextDetector} class.
- */
-public class TextDetectorTest extends TestCase {
-
- private final Detector detector = new TextDetector();
-
- public void testDetectNull() throws Exception {
- assertEquals(
- MediaType.OCTET_STREAM,
- detector.detect(null, new Metadata()));
- }
-
- /**
- * Test for type detection of empty documents.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-483">TIKA-483</a>
- */
- public void testDetectEmpty() throws Exception {
- assertNotText(new byte[0]);
- }
-
- public void testDetectText() throws Exception {
- assertText("Hello, World!".getBytes("UTF-8"));
- assertText(" \t\r\n".getBytes("UTF-8"));
- assertText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
- assertNotText(new byte[] { 0 });
- assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });
-
- byte[] data = new byte[512];
- Arrays.fill(data, (byte) '.');
- assertText(data);
- Arrays.fill(data, (byte) 0x1f);
- assertNotText(data);
-
- data = new byte[513];
- Arrays.fill(data, (byte) '.');
- assertText(data);
- Arrays.fill(data, (byte) 0x1f);
- assertNotText(data);
- }
-
- private void assertText(byte[] data) {
- try {
- InputStream stream = new ByteArrayInputStream(data);
- assertEquals(
- MediaType.TEXT_PLAIN,
- detector.detect(stream, new Metadata()));
-
- // Test that the stream has been reset
- for (int i = 0; i < data.length; i++) {
- assertEquals(data[i], (byte) stream.read());
- }
- assertEquals(-1, stream.read());
- } catch (IOException e) {
- fail("Unexpected exception from TextDetector");
- }
- }
-
- private void assertNotText(byte[] data) {
- try {
- assertEquals(
- MediaType.OCTET_STREAM,
- detector.detect(
- new ByteArrayInputStream(data), new Metadata()));
- } catch (IOException e) {
- fail("Unexpected exception from TextDetector");
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Test cases for the {@link TextDetector} class.
+ */
+public class TextDetectorTest extends TestCase {
+
+ private final Detector detector = new TextDetector();
+
+ public void testDetectNull() throws Exception {
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ }
+
+ /**
+ * Test for type detection of empty documents.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-483">TIKA-483</a>
+ */
+ public void testDetectEmpty() throws Exception {
+ assertNotText(new byte[0]);
+ }
+
+ public void testDetectText() throws Exception {
+ assertText("Hello, World!".getBytes("UTF-8"));
+ assertText(" \t\r\n".getBytes("UTF-8"));
+ assertText(new byte[] { -1, -2, -3, 0x09, 0x0A, 0x0C, 0x0D, 0x1B });
+ assertNotText(new byte[] { 0 });
+ assertNotText(new byte[] { 'H', 'e', 'l', 'l', 'o', 0 });
+
+ byte[] data = new byte[512];
+ Arrays.fill(data, (byte) '.');
+ assertText(data);
+ Arrays.fill(data, (byte) 0x1f);
+ assertNotText(data);
+
+ data = new byte[513];
+ Arrays.fill(data, (byte) '.');
+ assertText(data);
+ Arrays.fill(data, (byte) 0x1f);
+ assertNotText(data);
+ }
+
+ private void assertText(byte[] data) {
+ try {
+ InputStream stream = new ByteArrayInputStream(data);
+ assertEquals(
+ MediaType.TEXT_PLAIN,
+ detector.detect(stream, new Metadata()));
+
+ // Test that the stream has been reset
+ for (int i = 0; i < data.length; i++) {
+ assertEquals(data[i], (byte) stream.read());
+ }
+ assertEquals(-1, stream.read());
+ } catch (IOException e) {
+ fail("Unexpected exception from TextDetector");
+ }
+ }
+
+ private void assertNotText(byte[] data) {
+ try {
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(
+ new ByteArrayInputStream(data), new Metadata()));
+ } catch (IOException e) {
+ fail("Unexpected exception from TextDetector");
+ }
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TextDetectorTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java Thu Sep 2 14:40:55 2010
@@ -1,75 +1,75 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.detect;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.TreeMap;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-
-/**
- * Test cases for the {@link TypeDetector} class.
- */
-public class TypeDetectorTest extends TestCase {
-
- private Detector detector = new TypeDetector();
-
- private static final Map<String, String> params = new
- TreeMap<String, String>();
- static{
- params.put("a", "b");
- }
-
- private static final MediaType TEXT_PLAIN_A_EQ_B =
- new MediaType("text", "plain", params);
-
- public void testDetect() {
- assertDetect(MediaType.TEXT_PLAIN, "text/plain");
- assertDetect(MediaType.TEXT_PLAIN, "TEXT/PLAIN");
- assertDetect(MediaType.TEXT_PLAIN, " text/\tplain\n");
- assertDetect(TEXT_PLAIN_A_EQ_B, "text/plain; a=b");
- assertDetect(TEXT_PLAIN_A_EQ_B, "\ttext/plain; a=b\n");
-
- assertDetect(MediaType.OCTET_STREAM, "text\\plain");
-
- // test also the zero input cases
- assertDetect(MediaType.OCTET_STREAM, "");
- assertDetect(MediaType.OCTET_STREAM, null);
- try {
- assertEquals(
- MediaType.OCTET_STREAM,
- detector.detect(null, new Metadata()));
- } catch (IOException e) {
- fail("TypeDetector should never throw an IOException");
- }
- }
-
- private void assertDetect(MediaType type, String name){
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, name);
- try {
- assertEquals(type, detector.detect(null, metadata));
- } catch (IOException e) {
- fail("TypeDetector should never throw an IOException");
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.detect;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.TreeMap;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+/**
+ * Test cases for the {@link TypeDetector} class.
+ */
+public class TypeDetectorTest extends TestCase {
+
+ private Detector detector = new TypeDetector();
+
+ private static final Map<String, String> params = new
+ TreeMap<String, String>();
+ static{
+ params.put("a", "b");
+ }
+
+ private static final MediaType TEXT_PLAIN_A_EQ_B =
+ new MediaType("text", "plain", params);
+
+ public void testDetect() {
+ assertDetect(MediaType.TEXT_PLAIN, "text/plain");
+ assertDetect(MediaType.TEXT_PLAIN, "TEXT/PLAIN");
+ assertDetect(MediaType.TEXT_PLAIN, " text/\tplain\n");
+ assertDetect(TEXT_PLAIN_A_EQ_B, "text/plain; a=b");
+ assertDetect(TEXT_PLAIN_A_EQ_B, "\ttext/plain; a=b\n");
+
+ assertDetect(MediaType.OCTET_STREAM, "text\\plain");
+
+ // test also the zero input cases
+ assertDetect(MediaType.OCTET_STREAM, "");
+ assertDetect(MediaType.OCTET_STREAM, null);
+ try {
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ } catch (IOException e) {
+ fail("TypeDetector should never throw an IOException");
+ }
+ }
+
+ private void assertDetect(MediaType type, String name){
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, name);
+ try {
+ assertEquals(type, detector.detect(null, metadata));
+ } catch (IOException e) {
+ fail("TypeDetector should never throw an IOException");
+ }
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/detect/TypeDetectorTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java Thu Sep 2 14:40:55 2010
@@ -1,54 +1,54 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.language;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-public class LanguageProfileTest extends TestCase {
-
- public void testLanguageProfile() throws IOException {
- LanguageProfile foo = new LanguageProfile();
- assertEquals(0, foo.getCount("foo"));
-
- foo.add("foo");
- assertEquals(1, foo.getCount("foo"));
-
- foo.add("foo", 3);
- assertEquals(4, foo.getCount("foo"));
-
- LanguageProfile bar = new LanguageProfile();
- assertEquals(1.0, foo.distance(bar));
-
- bar.add("bar");
- assertEquals(Math.sqrt(2.0), foo.distance(bar));
-
- bar.add("bar", 3);
- assertEquals(Math.sqrt(2.0), foo.distance(bar));
-
- LanguageProfile foobar = new LanguageProfile();
- assertTrue(foo.distance(foobar) == bar.distance(foobar));
-
- foobar.add("foo");
- assertTrue( foo.distance(foobar) < bar.distance(foobar));
-
- foobar.add("bar");
- assertTrue(foo.distance(foobar) == bar.distance(foobar));
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.language;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+public class LanguageProfileTest extends TestCase {
+
+ public void testLanguageProfile() throws IOException {
+ LanguageProfile foo = new LanguageProfile();
+ assertEquals(0, foo.getCount("foo"));
+
+ foo.add("foo");
+ assertEquals(1, foo.getCount("foo"));
+
+ foo.add("foo", 3);
+ assertEquals(4, foo.getCount("foo"));
+
+ LanguageProfile bar = new LanguageProfile();
+ assertEquals(1.0, foo.distance(bar));
+
+ bar.add("bar");
+ assertEquals(Math.sqrt(2.0), foo.distance(bar));
+
+ bar.add("bar", 3);
+ assertEquals(Math.sqrt(2.0), foo.distance(bar));
+
+ LanguageProfile foobar = new LanguageProfile();
+ assertTrue(foo.distance(foobar) == bar.distance(foobar));
+
+ foobar.add("foo");
+ assertTrue( foo.distance(foobar) < bar.distance(foobar));
+
+ foobar.add("bar");
+ assertTrue(foo.distance(foobar) == bar.distance(foobar));
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/language/LanguageProfileTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java Thu Sep 2 14:40:55 2010
@@ -1,41 +1,41 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.language;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-public class ProfilingWriterTest extends TestCase {
-
- public void testProfilingWriter() throws IOException {
- ProfilingWriter writer = new ProfilingWriter();
- writer.write(" foo+BAR FooBar\n");
- writer.close();
-
- LanguageProfile profile = writer.getProfile();
- assertEquals(2, profile.getCount("_fo"));
- assertEquals(2, profile.getCount("foo"));
- assertEquals(1, profile.getCount("oo_"));
- assertEquals(1, profile.getCount("oob"));
- assertEquals(1, profile.getCount("oba"));
- assertEquals(1, profile.getCount("_ba"));
- assertEquals(2, profile.getCount("bar"));
- assertEquals(2, profile.getCount("ar_"));
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.language;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+public class ProfilingWriterTest extends TestCase {
+
+ public void testProfilingWriter() throws IOException {
+ ProfilingWriter writer = new ProfilingWriter();
+ writer.write(" foo+BAR FooBar\n");
+ writer.close();
+
+ LanguageProfile profile = writer.getProfile();
+ assertEquals(2, profile.getCount("_fo"));
+ assertEquals(2, profile.getCount("foo"));
+ assertEquals(1, profile.getCount("oo_"));
+ assertEquals(1, profile.getCount("oob"));
+ assertEquals(1, profile.getCount("oba"));
+ assertEquals(1, profile.getCount("_ba"));
+ assertEquals(2, profile.getCount("bar"));
+ assertEquals(2, profile.getCount("ar_"));
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/language/ProfilingWriterTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java Thu Sep 2 14:40:55 2010
@@ -1,49 +1,49 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.ByteArrayOutputStream;
-import java.io.OutputStream;
-
-import junit.framework.TestCase;
-
-import org.apache.tika.metadata.Metadata;
-
-/**
- * Test cases for the {@link BodyContentHandler} class.
- */
-public class BodyContentHandlerTest extends TestCase {
-
- /**
- * Test that the conversion to an {@link OutputStream} doesn't leave
- * characters unflushed in an internal buffer.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
- */
- public void testOutputStream() throws Exception {
- ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-
- XHTMLContentHandler xhtml = new XHTMLContentHandler(
- new BodyContentHandler(buffer), new Metadata());
- xhtml.startDocument();
- xhtml.element("p", "Test text");
- xhtml.endDocument();
-
- assertEquals("Test text\n", buffer.toString());
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.ByteArrayOutputStream;
+import java.io.OutputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * Test cases for the {@link BodyContentHandler} class.
+ */
+public class BodyContentHandlerTest extends TestCase {
+
+ /**
+ * Test that the conversion to an {@link OutputStream} doesn't leave
+ * characters unflushed in an internal buffer.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+ */
+ public void testOutputStream() throws Exception {
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(
+ new BodyContentHandler(buffer), new Metadata());
+ xhtml.startDocument();
+ xhtml.element("p", "Test text");
+ xhtml.endDocument();
+
+ assertEquals("Test text\n", buffer.toString());
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/BodyContentHandlerTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java Thu Sep 2 14:40:55 2010
@@ -1,66 +1,66 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.StringReader;
-import java.net.ConnectException;
-
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-import junit.framework.TestCase;
-
-import org.xml.sax.InputSource;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * Unit tests for the {@link OfflineContentHandler} class.
- */
-public class OfflineContentHandlerTest extends TestCase {
-
- private SAXParser parser;
-
- private DefaultHandler offline;
-
- protected void setUp() throws Exception {
- parser = SAXParserFactory.newInstance().newSAXParser();
- offline = new OfflineContentHandler(new DefaultHandler());
- }
-
- public void testExternalDTD() throws Exception {
- String xml =
- "<!DOCTYPE foo SYSTEM \"http://127.234.172.38:7845/bar\"><foo/>";
- try {
- parser.parse(new InputSource(new StringReader(xml)), offline);
- } catch (ConnectException e) {
- fail("Parser tried to access the external DTD:" + e);
- }
- }
-
- public void testExternalEntity() throws Exception {
- String xml =
- "<!DOCTYPE foo ["
- + " <!ENTITY bar SYSTEM \"http://127.234.172.38:7845/bar\">"
- + " ]><foo>&bar;</foo>";
- try {
- parser.parse(new InputSource(new StringReader(xml)), offline);
- } catch (ConnectException e) {
- fail("Parser tried to access the external DTD:" + e);
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.StringReader;
+import java.net.ConnectException;
+
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Unit tests for the {@link OfflineContentHandler} class.
+ */
+public class OfflineContentHandlerTest extends TestCase {
+
+ private SAXParser parser;
+
+ private DefaultHandler offline;
+
+ protected void setUp() throws Exception {
+ parser = SAXParserFactory.newInstance().newSAXParser();
+ offline = new OfflineContentHandler(new DefaultHandler());
+ }
+
+ public void testExternalDTD() throws Exception {
+ String xml =
+ "<!DOCTYPE foo SYSTEM \"http://127.234.172.38:7845/bar\"><foo/>";
+ try {
+ parser.parse(new InputSource(new StringReader(xml)), offline);
+ } catch (ConnectException e) {
+ fail("Parser tried to access the external DTD:" + e);
+ }
+ }
+
+ public void testExternalEntity() throws Exception {
+ String xml =
+ "<!DOCTYPE foo ["
+ + " <!ENTITY bar SYSTEM \"http://127.234.172.38:7845/bar\">"
+ + " ]><foo>&bar;</foo>";
+ try {
+ parser.parse(new InputSource(new StringReader(xml)), offline);
+ } catch (ConnectException e) {
+ fail("Parser tried to access the external DTD:" + e);
+ }
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/OfflineContentHandlerTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java Thu Sep 2 14:40:55 2010
@@ -1,68 +1,68 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import junit.framework.TestCase;
-
-/**
- * Unit tests for the {@link SafeContentHandler} class.
- */
-public class SafeContentHandlerTest extends TestCase {
-
- private ContentHandler output;
-
- private ContentHandler safe;
-
- protected void setUp() {
- output = new WriteOutContentHandler();
- safe = new SafeContentHandler(output);
- }
-
- public void testEmptyInput() throws SAXException {
- safe.characters(new char[0], 0, 0);
- safe.ignorableWhitespace(new char[0], 0, 0);
- assertEquals("", output.toString());
- }
-
- public void testNormalCharacters() throws SAXException {
- safe.characters("abc".toCharArray(), 0, 3);
- assertEquals("abc", output.toString());
- }
-
- public void testNormalWhitespace() throws SAXException {
- safe.ignorableWhitespace("abc".toCharArray(), 0, 3);
- assertEquals("abc", output.toString());
- }
-
- public void testInvalidCharacters() throws SAXException {
- safe.characters("ab\u0007".toCharArray(), 0, 3);
- safe.characters("a\u000Bc".toCharArray(), 0, 3);
- safe.characters("\u0019bc".toCharArray(), 0, 3);
- assertEquals("ab a c bc", output.toString());
- }
-
- public void testInvalidWhitespace() throws SAXException {
- safe.ignorableWhitespace("ab\u0000".toCharArray(), 0, 3);
- safe.ignorableWhitespace("a\u0001c".toCharArray(), 0, 3);
- safe.ignorableWhitespace("\u0002bc".toCharArray(), 0, 3);
- assertEquals("ab a c bc", output.toString());
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for the {@link SafeContentHandler} class.
+ */
+public class SafeContentHandlerTest extends TestCase {
+
+ private ContentHandler output;
+
+ private ContentHandler safe;
+
+ protected void setUp() {
+ output = new WriteOutContentHandler();
+ safe = new SafeContentHandler(output);
+ }
+
+ public void testEmptyInput() throws SAXException {
+ safe.characters(new char[0], 0, 0);
+ safe.ignorableWhitespace(new char[0], 0, 0);
+ assertEquals("", output.toString());
+ }
+
+ public void testNormalCharacters() throws SAXException {
+ safe.characters("abc".toCharArray(), 0, 3);
+ assertEquals("abc", output.toString());
+ }
+
+ public void testNormalWhitespace() throws SAXException {
+ safe.ignorableWhitespace("abc".toCharArray(), 0, 3);
+ assertEquals("abc", output.toString());
+ }
+
+ public void testInvalidCharacters() throws SAXException {
+ safe.characters("ab\u0007".toCharArray(), 0, 3);
+ safe.characters("a\u000Bc".toCharArray(), 0, 3);
+ safe.characters("\u0019bc".toCharArray(), 0, 3);
+ assertEquals("ab a c bc", output.toString());
+ }
+
+ public void testInvalidWhitespace() throws SAXException {
+ safe.ignorableWhitespace("ab\u0000".toCharArray(), 0, 3);
+ safe.ignorableWhitespace("a\u0001c".toCharArray(), 0, 3);
+ safe.ignorableWhitespace("\u0002bc".toCharArray(), 0, 3);
+ assertEquals("ab a c bc", output.toString());
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SafeContentHandlerTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java Thu Sep 2 14:40:55 2010
@@ -1,77 +1,77 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import org.apache.tika.metadata.Metadata;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import junit.framework.TestCase;
-
-/**
- * Unit tests for the {@link XHTMLContentHandler} class.
- */
-public class XHTMLContentHandlerTest extends TestCase {
-
- private ContentHandler output;
-
- private XHTMLContentHandler xhtml;
-
- protected void setUp() {
- output = new BodyContentHandler();
- xhtml = new XHTMLContentHandler(output, new Metadata());
- }
-
- /**
- * Test that content in block elements are properly separated in text
- * output.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-188">TIKA-188</a>
- */
- public void testExtraWhitespace() throws SAXException {
- xhtml.startDocument();
-
- xhtml.element("p", "foo");
- xhtml.startElement("p");
- xhtml.characters("b");
- xhtml.element("b", "a"); // inlines should not cause extra whitespace
- xhtml.characters("r");
- xhtml.endElement("p");
-
- xhtml.startElement("table");
- xhtml.startElement("tr");
- xhtml.element("th", "x");
- xhtml.element("th", "y");
- xhtml.endElement("tr");
- xhtml.startElement("tr");
- xhtml.element("td", "a");
- xhtml.element("td", "b");
- xhtml.endElement("tr");
- xhtml.endElement("table");
- xhtml.endDocument();
-
- String[] words = output.toString().split("\\s+");
- assertEquals(6, words.length);
- assertEquals("foo", words[0]);
- assertEquals("bar", words[1]);
- assertEquals("x", words[2]);
- assertEquals("y", words[3]);
- assertEquals("a", words[4]);
- assertEquals("b", words[5]);
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for the {@link XHTMLContentHandler} class.
+ */
+public class XHTMLContentHandlerTest extends TestCase {
+
+ private ContentHandler output;
+
+ private XHTMLContentHandler xhtml;
+
+ protected void setUp() {
+ output = new BodyContentHandler();
+ xhtml = new XHTMLContentHandler(output, new Metadata());
+ }
+
+ /**
+ * Test that content in block elements are properly separated in text
+ * output.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-188">TIKA-188</a>
+ */
+ public void testExtraWhitespace() throws SAXException {
+ xhtml.startDocument();
+
+ xhtml.element("p", "foo");
+ xhtml.startElement("p");
+ xhtml.characters("b");
+ xhtml.element("b", "a"); // inlines should not cause extra whitespace
+ xhtml.characters("r");
+ xhtml.endElement("p");
+
+ xhtml.startElement("table");
+ xhtml.startElement("tr");
+ xhtml.element("th", "x");
+ xhtml.element("th", "y");
+ xhtml.endElement("tr");
+ xhtml.startElement("tr");
+ xhtml.element("td", "a");
+ xhtml.element("td", "b");
+ xhtml.endElement("tr");
+ xhtml.endElement("table");
+ xhtml.endDocument();
+
+ String[] words = output.toString().split("\\s+");
+ assertEquals(6, words.length);
+ assertEquals("foo", words[0]);
+ assertEquals("bar", words[1]);
+ assertEquals("x", words[2]);
+ assertEquals("y", words[3]);
+ assertEquals("a", words[4]);
+ assertEquals("b", words[5]);
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/XHTMLContentHandlerTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg Thu Sep 2 14:40:55 2010
@@ -1,8 +1,8 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg:svg xmlns:svg="http://www.w3.org/2000/svg" width="12cm" height="12cm">
- <svg:g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
- <svg:circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
- <svg:circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
- <svg:circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
- </svg:g>
+<?xml version="1.0" encoding="UTF-8"?>
+<svg:svg xmlns:svg="http://www.w3.org/2000/svg" width="12cm" height="12cm">
+ <svg:g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
+ <svg:circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
+ <svg:circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
+ <svg:circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
+ </svg:g>
</svg:svg>
\ No newline at end of file
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles-with-prefix.svg
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg Thu Sep 2 14:40:55 2010
@@ -1,8 +1,8 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<svg xmlns="http://www.w3.org/2000/svg" width="12cm" height="12cm">
- <g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
- <circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
- <circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
- <circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
- </g>
+<?xml version="1.0" encoding="UTF-8"?>
+<svg xmlns="http://www.w3.org/2000/svg" width="12cm" height="12cm">
+ <g style="fill-opacity:0.7; stroke:black; stroke-width:0.1cm;">
+ <circle cx="6cm" cy="2cm" r="100" style="fill:red;" transform="translate(0,50)" />
+ <circle cx="6cm" cy="2cm" r="100" style="fill:blue;" transform="translate(70,150)" />
+ <circle cx="6cm" cy="2cm" r="100" style="fill:green;" transform="translate(-70,150)"/>
+ </g>
</svg>
\ No newline at end of file
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/circles.svg
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl Thu Sep 2 14:40:55 2010
@@ -1,9 +1,9 @@
-<?xml version="1.0" encoding="utf-8"?>
-<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
-
- <xsl:output method="xml" indent="yes"/>
-
- <xsl:template match="/">
- <test hello="world"/>
- </xsl:template>
-</xsl:stylesheet>
+<?xml version="1.0" encoding="utf-8"?>
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+
+ <xsl:output method="xml" indent="yes"/>
+
+ <xsl:template match="/">
+ <test hello="world"/>
+ </xsl:template>
+</xsl:stylesheet>
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/stylesheet.xsl
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml Thu Sep 2 14:40:55 2010
@@ -1,2 +1,2 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
+<?xml version="1.0" encoding="ISO-8859-1"?>
<test hello="world"/>
\ No newline at end of file
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-iso-8859-1.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml Thu Sep 2 14:40:55 2010
@@ -1,21 +1,21 @@
-<?xml version="1.0" encoding="ISO-8859-1"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
--->
-<?somepi blahblah test="ignore-me.xml" ?>
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+<?somepi blahblah test="ignore-me.xml" ?>
<test hello="world"/>
\ No newline at end of file
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-long-comment.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml Thu Sep 2 14:40:55 2010
@@ -1,2 +1,2 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version="1.0" encoding="UTF-8"?>
<test hello="world"/>
\ No newline at end of file
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test-utf8.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html?rev=991955&r1=991954&r2=991955&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html (original)
+++ tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html Thu Sep 2 14:40:55 2010
@@ -1,10 +1,10 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>Hello World</title>
-</head>
-<body>
- <p>Hello World!<p/>
-</body>
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<title>Hello World</title>
+</head>
+<body>
+ <p>Hello World!<p/>
+</body>
</html>
\ No newline at end of file
Propchange: tika/trunk/tika-core/src/test/resources/org/apache/tika/mime/test.html
------------------------------------------------------------------------------
svn:eol-style = native