You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/03/30 18:16:25 UTC
any23 git commit: ANY23-339 fixes itemscope hashcode collision
problem, allows absolute URIs as subjects
Repository: any23
Updated Branches:
refs/heads/master 316b4ec0d -> a1b72b720
ANY23-339 fixes itemscope hashcode collision problem, allows absolute URIs as subjects
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/a1b72b72
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/a1b72b72
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/a1b72b72
Branch: refs/heads/master
Commit: a1b72b720a2cdb2802fd8e82856ee67702d002cd
Parents: 316b4ec
Author: Hans <fi...@gmail.com>
Authored: Fri Mar 30 12:04:25 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Fri Mar 30 12:04:25 2018 -0500
----------------------------------------------------------------------
.../extractor/microdata/MicrodataExtractor.java | 29 ++++++++++++--------
.../microdata/MicrodataExtractorTest.java | 9 ++++++
2 files changed, 27 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/a1b72b72/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
index aa01dfe..d2fa7aa 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
@@ -40,6 +40,8 @@ import org.w3c.dom.NodeList;
import java.io.IOException;
import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;
@@ -430,21 +432,12 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
IRI documentIRI, ExtractionResult out,
Map<ItemScope, Resource> mappings
) throws ExtractionException {
- Resource subject;
- if (mappings.containsKey(itemScope)) {
- subject = mappings.get(itemScope);
- } else if (isAbsoluteURL(itemScope.getItemId())) {
- subject = RDFUtils.iri(itemScope.getItemId());
- } else {
- subject = RDFUtils.getBNode(Integer.toString(itemScope.hashCode()));
- }
- mappings.put(itemScope, subject);
+ Resource subject = mappings.computeIfAbsent(itemScope, scope -> createSubjectForItemId(scope.getItemId()));
// ItemScope.type could be null, but surely it's a valid URL
String itemScopeType = "";
if (itemScope.getType() != null) {
- String itemType;
- itemType = itemScope.getType().toString();
+ String itemType = itemScope.getType().toString();
out.writeTriple(subject, RDF.TYPE, RDFUtils.iri(itemType));
itemScopeType = itemScope.getType().toString();
}
@@ -472,6 +465,20 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
return subject;
}
+ private static Resource createSubjectForItemId(String itemId) {
+ if (itemId != null) {
+ try {
+ URI uri = new URI(itemId.trim());
+ if (uri.isAbsolute()) {
+ return RDFUtils.iri(uri.toString());
+ }
+ } catch (URISyntaxException e) {
+ //not an absolute uri
+ }
+ }
+ return RDFUtils.bnode();
+ }
+
private void processProperty(
Resource subject,
String propName,
http://git-wip-us.apache.org/repos/asf/any23/blob/a1b72b72/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
index f8a0650..8161b36 100644
--- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
@@ -20,6 +20,7 @@ package org.apache.any23.extractor.microdata;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -73,6 +74,14 @@ public class MicrodataExtractorTest extends AbstractExtractorTestCase {
logger.debug(dumpModelToNQuads());
}
+ @Test
+ public void testMicrodataBasic() {
+ assertExtract("/microdata/microdata-basic.html");
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 40);
+ assertStatementsSize(RDFUtils.iri("urn:isbn:0-330-34032-8"), null, null, 4);
+ }
+
/**
* Reference test as provided by <a href="http://googlewebmastercentral.blogspot.com/2010/03/microdata-support-for-rich-snippets.html">Google Rich Snippet for Microdata.</a>
*