You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/07/31 21:52:51 UTC
any23 git commit: ANY23-377 don't replace empty strings with 'Null'
Repository: any23
Updated Branches:
refs/heads/master 6173637bb -> a07d1f058
ANY23-377 don't replace empty strings with 'Null'
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/a07d1f05
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/a07d1f05
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/a07d1f05
Branch: refs/heads/master
Commit: a07d1f058fcdc2d994dcd220759310737fe68965
Parents: 6173637
Author: Hans <fi...@gmail.com>
Authored: Tue Jul 31 16:37:25 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Tue Jul 31 16:46:41 2018 -0500
----------------------------------------------------------------------
.../extractor/microdata/ItemPropValue.java | 51 +++++++++-----------
.../microdata-bad-properties-expected.nquads | 6 +--
2 files changed, 25 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/a07d1f05/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index f32b468..b4710de 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -22,6 +22,8 @@ import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
+import java.util.Objects;
+
import org.apache.any23.util.StringUtils;
/**
@@ -47,10 +49,24 @@ public class ItemPropValue {
* Supported types.
*/
public enum Type {
- Plain,
- Link,
- Date,
- Nested
+ Plain(String.class),
+ Link(String.class),
+ Date(Date.class),
+ Nested(ItemScope.class);
+
+ Type(Class<?> contentClass) {
+ this.contentClass = contentClass;
+ }
+
+ private final Class<?> contentClass;
+
+ private Object checkClass(Object content) {
+ Objects.requireNonNull(content, "content cannot be null");
+ if (!contentClass.isInstance(content)) {
+ throw new IllegalArgumentException("content must be a " + contentClass.getName() + " when type is " + this);
+ }
+ return content;
+ }
}
public static Date parseDateTime(String dateStr) throws ParseException {
@@ -77,31 +93,8 @@ public class ItemPropValue {
* @param type content type.
*/
public ItemPropValue(Object content, Type type) {
- if(content == null) {
- throw new NullPointerException("content cannot be null.");
- }
- if(type == null) {
- throw new NullPointerException("type cannot be null.");
- }
- if(type == Type.Nested && ! (content instanceof ItemScope) ) {
- throw new IllegalArgumentException(
- "content must be an " + ItemScope.class + " when type is " + Type.Nested
- );
- }
- if(type == Type.Date && !(content instanceof Date) ) {
- throw new IllegalArgumentException(
- "content must be a " + Date.class.getName() + " whe type is " + Type.Date
- );
- }
- if(content instanceof String && ((String) content).trim().length() == 0) {
- // ANY23-115 Empty spans seem to break ANY23
- // instead of throwing the exception and in effect failing the entire
- // parse job we wish to be lenient on web content publishers and add
- // Null (String) as content.
- content = "Null";
- }
- this.content = content;
- this.type = type;
+ this.type = Objects.requireNonNull(type, "type cannot be null");
+ this.content = type.checkClass(content);
}
/**
http://git-wip-us.apache.org/repos/asf/any23/blob/a07d1f05/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads b/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
index e5b6f29..b759d1b 100644
--- a/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
+++ b/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
@@ -49,7 +49,7 @@ _:node1cjov1p83x7 <http://schema.org/name> "Begin to Knit Classes" <http://bob.e
_:node1cjov1p83x7 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> .
_:node1cjov1p83x8 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
_:node1cjov1p83x9 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x9 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x9 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
_:node1cjov1p83x8 <http://schema.org/address> _:node1cjov1p83x9 <http://bob.example.com/> .
_:node1cjov1p83x8 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> .
_:node1cjov1p83x7 <http://schema.org/location> _:node1cjov1p83x8 <http://bob.example.com/> .
@@ -62,7 +62,7 @@ _:node1cjov1p83x10 <http://schema.org/name> "Cambridge Historic School Museum To
_:node1cjov1p83x10 <http://schema.org/description> "Built in 1906, the Cambridge Historic School - listed on the..." <http://bob.example.com/> .
_:node1cjov1p83x11 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
_:node1cjov1p83x12 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x12 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x12 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
_:node1cjov1p83x11 <http://schema.org/address> _:node1cjov1p83x12 <http://bob.example.com/> .
_:node1cjov1p83x11 <http://schema.org/name> "Cambridge Historic School" <http://bob.example.com/> .
_:node1cjov1p83x10 <http://schema.org/location> _:node1cjov1p83x11 <http://bob.example.com/> .
@@ -75,7 +75,7 @@ _:node1cjov1p83x13 <http://schema.org/name> "Begin to Knit Classes" <http://bob.
_:node1cjov1p83x13 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> .
_:node1cjov1p83x14 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
_:node1cjov1p83x15 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x15 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x15 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
_:node1cjov1p83x14 <http://schema.org/address> _:node1cjov1p83x15 <http://bob.example.com/> .
_:node1cjov1p83x14 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> .
_:node1cjov1p83x13 <http://schema.org/location> _:node1cjov1p83x14 <http://bob.example.com/> .