You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/10/30 03:29:21 UTC
[2/3] any23 git commit: check for itemscope before content,
as per microdata spec
check for itemscope before content, as per microdata spec
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/4f280409
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/4f280409
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/4f280409
Branch: refs/heads/master
Commit: 4f2804094751e1522428305938e3a4c4b13d2aa4
Parents: 837d193
Author: Hans <fi...@gmail.com>
Authored: Mon Oct 29 21:39:05 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Mon Oct 29 21:39:05 2018 -0500
----------------------------------------------------------------------
.../extractor/microdata/MicrodataParser.java | 69 +++++++++++---------
1 file changed, 38 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/4f280409/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
index 013a318..c086d3f 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
@@ -353,14 +353,18 @@ public class MicrodataParser {
*/
public ItemPropValue getPropertyValue(Node node) throws MicrodataParserException {
final ItemPropValue itemPropValue = itemPropValues.get(node);
- if(itemPropValue != null)
+ if (itemPropValue != null)
return itemPropValue;
+ if (isItemScope(node)) {
+ return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested);
+ }
+
final String nodeName = node.getNodeName().toLowerCase();
//see http://w3c.github.io/microdata-rdf/#dfn-property-values
if ("data".equals(nodeName) || "meter".equals(nodeName)) {
- String value = StringUtils.stripToEmpty(readContentAttribute(node, "value"));
+ String value = value(node, "value");
Literal l;
if (XMLDatatypeUtil.isValidInteger(value)) {
l = RDFUtils.literal(value, XMLSchema.INTEGER);
@@ -371,8 +375,8 @@ public class MicrodataParser {
}
return new ItemPropValue(l);
}
- if( "time".equals(nodeName) ) {
- String dateTimeStr = StringUtils.stripToEmpty(readContentAttribute(node, "datetime"));
+ if ("time".equals(nodeName)) {
+ String dateTimeStr = value(node, "datetime");
Literal l;
if (XMLDatatypeUtil.isValidDate(dateTimeStr)) {
l = RDFUtils.literal(dateTimeStr, XMLSchema.DATE);
@@ -387,47 +391,39 @@ public class MicrodataParser {
} else if (XMLDatatypeUtil.isValidDuration(dateTimeStr)) {
l = RDFUtils.literal(dateTimeStr, XMLSchema.DURATION);
} else {
- String lang = getLanguage(node);
- if (lang != null) {
- l = RDFUtils.literal(dateTimeStr, lang);
- } else {
- l = RDFUtils.literal(dateTimeStr);
- }
+ l = RDFUtils.literal(dateTimeStr, getLanguage(node));
}
return new ItemPropValue(l);
}
- if (DomUtils.hasAttribute(node, "content")) {
- String val = DomUtils.readAttribute(node, "content");
- String lang = getLanguage(node);
- Literal l = lang == null ? RDFUtils.literal(val) : RDFUtils.literal(val, lang);
- return new ItemPropValue(l);
- }
-
- if( SRC_TAGS.contains(nodeName) ) {
- return new ItemPropValue( DomUtils.readAttribute(node, "src"), ItemPropValue.Type.Link);
+ if (SRC_TAGS.contains(nodeName)) {
+ return link(node, "src");
}
- if( HREF_TAGS.contains(nodeName) ) {
- return new ItemPropValue( DomUtils.readAttribute(node, "href"), ItemPropValue.Type.Link);
+ if (HREF_TAGS.contains(nodeName)) {
+ return link(node, "href");
}
- if( "object".equals(nodeName) ) {
- return new ItemPropValue( DomUtils.readAttribute(node, "data"), ItemPropValue.Type.Link);
+ if ("object".equals(nodeName)) {
+ return link(node, "data");
}
- if( isItemScope(node) ) {
- return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested);
+ String val = DomUtils.readAttribute(node, "content", null);
+ if (val != null) {
+ return new ItemPropValue(RDFUtils.literal(val, getLanguage(node)));
}
- String lang = getLanguage(node);
- StringBuilder content = new StringBuilder();
- appendFormatted(node, content, false);
- Literal l = RDFUtils.literal(content.toString(), lang);
+ Literal l = RDFUtils.literal(textContent(node), getLanguage(node));
final ItemPropValue newItemPropValue = new ItemPropValue(l);
itemPropValues.put(node, newItemPropValue);
return newItemPropValue;
}
+ private static String textContent(Node node) {
+ StringBuilder content = new StringBuilder();
+ appendFormatted(node, content, false);
+ return content.toString();
+ }
+
private static boolean shouldSeparateWithNewline(CharSequence s0, CharSequence s1) {
for (int i = 0, len = s1.length(); i < len; i++) {
char ch = s1.charAt(i);
@@ -476,7 +472,7 @@ public class MicrodataParser {
}
}
- private static String readContentAttribute(Node node, String attrName) {
+ private static String content(Node node, String attrName) {
NamedNodeMap attributes = node.getAttributes();
if (attributes != null) {
Node attr = attributes.getNamedItem("content");
@@ -488,7 +484,18 @@ public class MicrodataParser {
return attr.getNodeValue();
}
}
- return node.getTextContent();
+ return null;
+ }
+
+ private static String value(Node node, String attrName) {
+ String content = content(node, attrName);
+ return StringUtils.stripToEmpty(content != null ? content : node.getTextContent());
+ }
+
+ private static ItemPropValue link(Node node, String attrName) {
+ String content = content(node, attrName);
+ return content == null ? new ItemPropValue(RDFUtils.literal(""))
+ : new ItemPropValue(content, ItemPropValue.Type.Link);
}
//see https://www.w3.org/TR/html52/dom.html#the-lang-and-xmllang-attributes