You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/11/08 02:20:03 UTC

tika git commit: TIKA-1896 -- add test files and unit tests, no fix yet

Repository: tika
Updated Branches:
  refs/heads/master aadccbf97 -> 7b45c7ceb


TIKA-1896 -- add test files and unit tests, no fix yet


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/7b45c7ce
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/7b45c7ce
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/7b45c7ce

Branch: refs/heads/master
Commit: 7b45c7ceb0830cb33a04571da87dd86a817d4138
Parents: aadccbf
Author: tballison <ta...@mitre.org>
Authored: Mon Nov 7 21:19:56 2016 -0500
Committer: tballison <ta...@mitre.org>
Committed: Mon Nov 7 21:19:56 2016 -0500

----------------------------------------------------------------------
 .../apache/tika/parser/html/HtmlParserTest.java    | 17 ++++++++++++++++-
 .../test-documents/testHTMLBadScript.html          |  9 +++++++++
 .../test-documents/testHTMLGoodScript.html         |  9 +++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/7b45c7ce/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
index 41efcc0..75744ca 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
@@ -127,7 +127,7 @@ public class HtmlParserTest extends TikaTest {
     }
 
     @Test
-    @Ignore("The file 'testXHTML_utf8.html' is not available fo testing")
+    @Ignore("The file 'testXHTML_utf8.html' is not available for testing")
     public void XtestParseUTF8() throws IOException, SAXException, TikaException {
         String path = "/test-documents/testXHTML_utf8.html";
         Metadata metadata = new Metadata();
@@ -1219,6 +1219,21 @@ public class HtmlParserTest extends TikaTest {
     }
 
     @Test
+    @Ignore("until we fix TIKA-1896")
+    public void testBadScript() throws Exception {
+        String xml = getXML("testHTMLBadScript.html").xml;
+        assertContains("This is a test", xml);
+        assertNotContained("cool", xml);
+    }
+
+    @Test
+    public void testGoodScript() throws Exception {
+        String xml = getXML("testHTMLGoodScript.html").xml;
+        assertContains("This is a test", xml);
+        assertNotContained("cool", xml);
+    }
+
+    @Test
     public void testMultiThreadingEncodingDetection() throws Exception {
         List<EncodingDetector> detectors = new ArrayList<>();
         ServiceLoader loader =

http://git-wip-us.apache.org/repos/asf/tika/blob/7b45c7ce/tika-parsers/src/test/resources/test-documents/testHTMLBadScript.html
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testHTMLBadScript.html b/tika-parsers/src/test/resources/test-documents/testHTMLBadScript.html
new file mode 100644
index 0000000..2c61f4f
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testHTMLBadScript.html
@@ -0,0 +1,9 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+    <script lang="javascript">cool script</script language>
+</head>
+<body>
+<p>This is a test.</p>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tika/blob/7b45c7ce/tika-parsers/src/test/resources/test-documents/testHTMLGoodScript.html
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testHTMLGoodScript.html b/tika-parsers/src/test/resources/test-documents/testHTMLGoodScript.html
new file mode 100644
index 0000000..f37eb98
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testHTMLGoodScript.html
@@ -0,0 +1,9 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+    <script lang="javascript">cool script</script>
+</head>
+<body>
+<p>This is a test.</p>
+</body>
+</html>
\ No newline at end of file