You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/03/30 18:16:25 UTC

any23 git commit: ANY23-339 fixes itemscope hashcode collision problem, allows absolute URIs as subjects

Repository: any23
Updated Branches:
  refs/heads/master 316b4ec0d -> a1b72b720


ANY23-339 fixes itemscope hashcode collision problem, allows absolute URIs as subjects


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/a1b72b72
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/a1b72b72
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/a1b72b72

Branch: refs/heads/master
Commit: a1b72b720a2cdb2802fd8e82856ee67702d002cd
Parents: 316b4ec
Author: Hans <fi...@gmail.com>
Authored: Fri Mar 30 12:04:25 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Fri Mar 30 12:04:25 2018 -0500

----------------------------------------------------------------------
 .../extractor/microdata/MicrodataExtractor.java | 29 ++++++++++++--------
 .../microdata/MicrodataExtractorTest.java       |  9 ++++++
 2 files changed, 27 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/a1b72b72/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
index aa01dfe..d2fa7aa 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
@@ -40,6 +40,8 @@ import org.w3c.dom.NodeList;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Date;
 import java.util.HashMap;
@@ -430,21 +432,12 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
             IRI documentIRI, ExtractionResult out,
             Map<ItemScope, Resource> mappings
     ) throws ExtractionException {
-        Resource subject;
-        if (mappings.containsKey(itemScope)) {
-            subject = mappings.get(itemScope);
-        } else if (isAbsoluteURL(itemScope.getItemId())) {
-            subject = RDFUtils.iri(itemScope.getItemId());
-        } else {
-            subject = RDFUtils.getBNode(Integer.toString(itemScope.hashCode()));
-        }
-        mappings.put(itemScope, subject);
+        Resource subject = mappings.computeIfAbsent(itemScope, scope -> createSubjectForItemId(scope.getItemId()));
 
         // ItemScope.type could be null, but surely it's a valid URL
         String itemScopeType = "";
         if (itemScope.getType() != null) {
-            String itemType;
-            itemType = itemScope.getType().toString();
+            String itemType = itemScope.getType().toString();
             out.writeTriple(subject, RDF.TYPE, RDFUtils.iri(itemType));
             itemScopeType = itemScope.getType().toString();
         }
@@ -472,6 +465,20 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
         return subject;
     }
 
+    private static Resource createSubjectForItemId(String itemId) {
+        if (itemId != null) {
+            try {
+                URI uri = new URI(itemId.trim());
+                if (uri.isAbsolute()) {
+                    return RDFUtils.iri(uri.toString());
+                }
+            } catch (URISyntaxException e) {
+                //not an absolute uri
+            }
+        }
+        return RDFUtils.bnode();
+    }
+
     private void processProperty(
             Resource subject,
             String propName,

http://git-wip-us.apache.org/repos/asf/any23/blob/a1b72b72/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
index f8a0650..8161b36 100644
--- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
@@ -20,6 +20,7 @@ package org.apache.any23.extractor.microdata;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.vocab.SINDICE;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -73,6 +74,14 @@ public class MicrodataExtractorTest extends AbstractExtractorTestCase {
         logger.debug(dumpModelToNQuads());
     }
 
+    @Test
+    public void testMicrodataBasic() {
+        assertExtract("/microdata/microdata-basic.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 40);
+        assertStatementsSize(RDFUtils.iri("urn:isbn:0-330-34032-8"), null, null, 4);
+    }
+
     /**
      * Reference test as provided by <a href="http://googlewebmastercentral.blogspot.com/2010/03/microdata-support-for-rich-snippets.html">Google Rich Snippet for Microdata.</a>
      *