You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2018/01/25 04:57:24 UTC

[1/2] any23 git commit: ANY23-291 Allow JSONLD scripts to be located anywhere in document

Repository: any23
Updated Branches:
  refs/heads/master eefa208db -> e35bff451


ANY23-291 Allow JSONLD scripts to be located anywhere in document


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d6955826
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d6955826
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d6955826

Branch: refs/heads/master
Commit: d69558268b5d8e8d57f00d94b864c54ec2eaf75f
Parents: 07f7421
Author: Hans <fi...@gmail.com>
Authored: Wed Jan 24 19:58:25 2018 -0600
Committer: Hans <fi...@gmail.com>
Committed: Wed Jan 24 21:20:27 2018 -0600

----------------------------------------------------------------------
 .../extractor/html/EmbeddedJSONLDExtractor.java |  2 +-
 .../html/EmbeddedJSONLDExtractorTest.java       | 14 ++++++
 .../html-body-embedded-jsonld-extractor.html    | 37 +++++++++++++++
 ...head-and-body-embedded-jsonld-extractor.html | 47 ++++++++++++++++++++
 4 files changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index 1e6efdf..aeffdda 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -137,7 +137,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
           String baseProfile, ExtractionParameters extractionParameters,
           ExtractionContext extractionContext, ExtractionResult out)
                   throws IOException, ExtractionException {
-    List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
+    List<Node> scriptNodes = DomUtils.findAll(in, "//SCRIPT");
     Set<JSONLDScript> result = new HashSet<>();
     extractor = new JSONLDExtractorFactory().createExtractor();
     for (Node jsonldNode : scriptNodes) {

http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
index 70baa30..6e7bfa4 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
@@ -39,6 +39,20 @@ public class EmbeddedJSONLDExtractorTest extends AbstractExtractorTestCase {
 		assertStatementsSize(null, null, null, 7);
 	}
 
+	@Test
+	public void testEmbeddedJSONLDInBody() throws Exception {
+		assertExtract("/html/html-body-embedded-jsonld-extractor.html");
+		assertModelNotEmpty();
+		assertStatementsSize(null, null, null, 3);
+	}
+
+	@Test
+	public void testEmbeddedJSONLDInHeadAndBody() throws Exception {
+		assertExtract("/html/html-head-and-body-embedded-jsonld-extractor.html");
+		assertModelNotEmpty();
+		assertStatementsSize(null, null, null, 7);
+	}
+
 	@Override
 	protected ExtractorFactory<?> getExtractorFactory() {
 		return new EmbeddedJSONLDExtractorFactory();

http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
new file mode 100644
index 0000000..7efce2d
--- /dev/null
+++ b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
@@ -0,0 +1,37 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+    <title>Hello World!</title>
+    <meta name="title" content="Embedded JSONLD extractor"/>
+</head>
+<body>
+    <h1>Embedded JSONLD Extractor</h1>
+    <p>It extracts only the embedded JSON-LD elements.
+    <div>
+    <script type="application/ld+json">
+    {
+      "@context": "http://json-ld.org/contexts/person.jsonld",
+      "@id": "http://dbpedia.org/resource/Robert_Millar",
+      "@type": "Person",
+      "name": "Robert Millar",
+      "born": "1958-09-13T00:00:00"
+    }
+    </script>
+    </div>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
new file mode 100644
index 0000000..f8ce071
--- /dev/null
+++ b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
@@ -0,0 +1,47 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+    <title>Hello World!</title>
+    <meta name="title" content="Embedded JSONLD extractor"/>
+    <!-- As per spec in http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents -->
+    <script type="application/ld+json">
+    {
+      "@context": "http://json-ld.org/contexts/person.jsonld",
+      "@id": "http://dbpedia.org/resource/Robert_Millar",
+      "@type": "Person",
+      "name": "Robert Millar",
+      "born": "1958-09-13T00:00:00"
+    }
+    </script>
+
+</head>
+<h1>Embedded JSONLD Extractor</h1>
+<p>It extracts only the embedded JSON-LD elements.
+<body>
+    <script type="application/ld+json">
+    {
+      "@context": "http://json-ld.org/contexts/person.jsonld",
+      "@id": "http://dbpedia.org/resource/Robert_Frost",
+      "@type": "Person",
+      "name": "Robert Frost",
+      "born": "1874-03-26T00:00:00",
+      "died": "1963-01-29T00:00:00"
+    }
+    </script>
+</body>
+</html>
\ No newline at end of file


[2/2] any23 git commit: Merge branch 'ANY23-291' of https://github.com/HansBrende/any23

Posted by le...@apache.org.
Merge branch 'ANY23-291' of https://github.com/HansBrende/any23


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/e35bff45
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/e35bff45
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/e35bff45

Branch: refs/heads/master
Commit: e35bff4517110000a10ce5d35bc0091681481698
Parents: eefa208 d695582
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jan 24 20:54:23 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jan 24 20:54:23 2018 -0800

----------------------------------------------------------------------
 .../extractor/html/EmbeddedJSONLDExtractor.java |  2 +-
 .../html/EmbeddedJSONLDExtractorTest.java       | 14 ++++++
 .../html-body-embedded-jsonld-extractor.html    | 37 +++++++++++++++
 ...head-and-body-embedded-jsonld-extractor.html | 47 ++++++++++++++++++++
 4 files changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------