You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/08/06 23:01:37 UTC

any23 git commit: sanity check: added another JSON-cleaning test case

Repository: any23
Updated Branches:
  refs/heads/master 99398b46a -> 92945ab9c


sanity check: added another JSON-cleaning test case


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/92945ab9
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/92945ab9
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/92945ab9

Branch: refs/heads/master
Commit: 92945ab9cf8b846966f7da327885b4d3b6a4035a
Parents: 99398b4
Author: Hans <fi...@gmail.com>
Authored: Mon Aug 6 18:00:33 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Mon Aug 6 18:00:33 2018 -0500

----------------------------------------------------------------------
 .../extractor/rdf/JSONLDExtractorTest.java      | 17 +++++++++++++
 .../test/resources/html/json-cleaning-test.json | 26 ++++++++++++++++++++
 2 files changed, 43 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/92945ab9/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
index f1338b4..5db98ae 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParser;
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
@@ -82,6 +84,21 @@ public class JSONLDExtractorTest {
     }
   }
 
+  @Test
+  public void testJsonCleaning() throws Exception {
+    JsonCleaningInputStream stream = new JsonCleaningInputStream(getClass().getResourceAsStream("/html/json-cleaning-test.json"));
+
+    JsonParser parser = new JsonFactory().createParser(stream);
+
+    int numTokens = 0;
+    while (parser.nextToken() != null) {
+      numTokens++;
+    }
+
+    Assert.assertEquals(numTokens, 41);
+
+  }
+
   public void extract(IRI uri, String filePath) 
     throws IOException, ExtractionException, TripleHandlerException {
     ByteArrayOutputStream baos = new ByteArrayOutputStream();

http://git-wip-us.apache.org/repos/asf/any23/blob/92945ab9/test-resources/src/test/resources/html/json-cleaning-test.json
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/html/json-cleaning-test.json b/test-resources/src/test/resources/html/json-cleaning-test.json
new file mode 100644
index 0000000..09ec189
--- /dev/null
+++ b/test-resources/src/test/resources/html/json-cleaning-test.json
@@ -0,0 +1,26 @@
+{ /*  " ' # //*/
+  "a": { #comment <![CDATA[
+    "b": 1234;'c': {
+      "d": [
+        {
+          "f": {
+
+          } "g": {
+           'i':[1,2,3,//comment
+                                      ,4,5,,
+                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      6] 'j':[
+          1 ]]>
+
+        ] /**/
+          }, /*}
+          */} {
+    "key"
+            : 'value',,,
+  }
+
+      ] "e": {
+
+      },
+    }
+  }
+}
\ No newline at end of file