You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2018/01/25 04:57:24 UTC
[1/2] any23 git commit: ANY23-291 Allow JSONLD scripts to be located
anywhere in document
Repository: any23
Updated Branches:
refs/heads/master eefa208db -> e35bff451
ANY23-291 Allow JSONLD scripts to be located anywhere in document
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/d6955826
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/d6955826
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/d6955826
Branch: refs/heads/master
Commit: d69558268b5d8e8d57f00d94b864c54ec2eaf75f
Parents: 07f7421
Author: Hans <fi...@gmail.com>
Authored: Wed Jan 24 19:58:25 2018 -0600
Committer: Hans <fi...@gmail.com>
Committed: Wed Jan 24 21:20:27 2018 -0600
----------------------------------------------------------------------
.../extractor/html/EmbeddedJSONLDExtractor.java | 2 +-
.../html/EmbeddedJSONLDExtractorTest.java | 14 ++++++
.../html-body-embedded-jsonld-extractor.html | 37 +++++++++++++++
...head-and-body-embedded-jsonld-extractor.html | 47 ++++++++++++++++++++
4 files changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index 1e6efdf..aeffdda 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -137,7 +137,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
String baseProfile, ExtractionParameters extractionParameters,
ExtractionContext extractionContext, ExtractionResult out)
throws IOException, ExtractionException {
- List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
+ List<Node> scriptNodes = DomUtils.findAll(in, "//SCRIPT");
Set<JSONLDScript> result = new HashSet<>();
extractor = new JSONLDExtractorFactory().createExtractor();
for (Node jsonldNode : scriptNodes) {
http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
index 70baa30..6e7bfa4 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java
@@ -39,6 +39,20 @@ public class EmbeddedJSONLDExtractorTest extends AbstractExtractorTestCase {
assertStatementsSize(null, null, null, 7);
}
+ @Test
+ public void testEmbeddedJSONLDInBody() throws Exception {
+ assertExtract("/html/html-body-embedded-jsonld-extractor.html");
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 3);
+ }
+
+ @Test
+ public void testEmbeddedJSONLDInHeadAndBody() throws Exception {
+ assertExtract("/html/html-head-and-body-embedded-jsonld-extractor.html");
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 7);
+ }
+
@Override
protected ExtractorFactory<?> getExtractorFactory() {
return new EmbeddedJSONLDExtractorFactory();
http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
new file mode 100644
index 0000000..7efce2d
--- /dev/null
+++ b/test-resources/src/test/resources/html/html-body-embedded-jsonld-extractor.html
@@ -0,0 +1,37 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <title>Hello World!</title>
+ <meta name="title" content="Embedded JSONLD extractor"/>
+</head>
+<body>
+ <h1>Embedded JSONLD Extractor</h1>
+ <p>It extracts only the embedded JSON-LD elements.
+ <div>
+ <script type="application/ld+json">
+ {
+ "@context": "http://json-ld.org/contexts/person.jsonld",
+ "@id": "http://dbpedia.org/resource/Robert_Millar",
+ "@type": "Person",
+ "name": "Robert Millar",
+ "born": "1958-09-13T00:00:00"
+ }
+ </script>
+ </div>
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/d6955826/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
new file mode 100644
index 0000000..f8ce071
--- /dev/null
+++ b/test-resources/src/test/resources/html/html-head-and-body-embedded-jsonld-extractor.html
@@ -0,0 +1,47 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <title>Hello World!</title>
+ <meta name="title" content="Embedded JSONLD extractor"/>
+ <!-- As per spec in http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents -->
+ <script type="application/ld+json">
+ {
+ "@context": "http://json-ld.org/contexts/person.jsonld",
+ "@id": "http://dbpedia.org/resource/Robert_Millar",
+ "@type": "Person",
+ "name": "Robert Millar",
+ "born": "1958-09-13T00:00:00"
+ }
+ </script>
+
+</head>
+<h1>Embedded JSONLD Extractor</h1>
+<p>It extracts only the embedded JSON-LD elements.
+<body>
+ <script type="application/ld+json">
+ {
+ "@context": "http://json-ld.org/contexts/person.jsonld",
+ "@id": "http://dbpedia.org/resource/Robert_Frost",
+ "@type": "Person",
+ "name": "Robert Frost",
+ "born": "1874-03-26T00:00:00",
+ "died": "1963-01-29T00:00:00"
+ }
+ </script>
+</body>
+</html>
\ No newline at end of file
[2/2] any23 git commit: Merge branch 'ANY23-291' of
https://github.com/HansBrende/any23
Posted by le...@apache.org.
Merge branch 'ANY23-291' of https://github.com/HansBrende/any23
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/e35bff45
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/e35bff45
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/e35bff45
Branch: refs/heads/master
Commit: e35bff4517110000a10ce5d35bc0091681481698
Parents: eefa208 d695582
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jan 24 20:54:23 2018 -0800
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jan 24 20:54:23 2018 -0800
----------------------------------------------------------------------
.../extractor/html/EmbeddedJSONLDExtractor.java | 2 +-
.../html/EmbeddedJSONLDExtractorTest.java | 14 ++++++
.../html-body-embedded-jsonld-extractor.html | 37 +++++++++++++++
...head-and-body-embedded-jsonld-extractor.html | 47 ++++++++++++++++++++
4 files changed, 99 insertions(+), 1 deletion(-)
----------------------------------------------------------------------