You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/07/31 21:52:51 UTC

any23 git commit: ANY23-377 don't replace empty strings with 'Null'

Repository: any23
Updated Branches:
  refs/heads/master 6173637bb -> a07d1f058


ANY23-377 don't replace empty strings with 'Null'


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/a07d1f05
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/a07d1f05
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/a07d1f05

Branch: refs/heads/master
Commit: a07d1f058fcdc2d994dcd220759310737fe68965
Parents: 6173637
Author: Hans <fi...@gmail.com>
Authored: Tue Jul 31 16:37:25 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Tue Jul 31 16:46:41 2018 -0500

----------------------------------------------------------------------
 .../extractor/microdata/ItemPropValue.java      | 51 +++++++++-----------
 .../microdata-bad-properties-expected.nquads    |  6 +--
 2 files changed, 25 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/a07d1f05/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index f32b468..b4710de 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -22,6 +22,8 @@ import java.net.URL;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.util.Date;
+import java.util.Objects;
+
 import org.apache.any23.util.StringUtils;
 
 /**
@@ -47,10 +49,24 @@ public class ItemPropValue {
      * Supported types.
      */
     public enum Type {
-        Plain,
-        Link,
-        Date,
-        Nested
+        Plain(String.class),
+        Link(String.class),
+        Date(Date.class),
+        Nested(ItemScope.class);
+
+        Type(Class<?> contentClass) {
+            this.contentClass = contentClass;
+        }
+
+        private final Class<?> contentClass;
+
+        private Object checkClass(Object content) {
+            Objects.requireNonNull(content, "content cannot be null");
+            if (!contentClass.isInstance(content)) {
+                throw new IllegalArgumentException("content must be a " + contentClass.getName() + " when type is " + this);
+            }
+            return content;
+        }
     }
 
     public static Date parseDateTime(String dateStr) throws ParseException {
@@ -77,31 +93,8 @@ public class ItemPropValue {
      * @param type content type.
      */
     public ItemPropValue(Object content, Type type) {
-        if(content == null) {
-            throw new NullPointerException("content cannot be null.");
-        }
-        if(type == null) {
-            throw new NullPointerException("type cannot be null.");
-        }
-        if(type == Type.Nested && ! (content instanceof ItemScope) ) {
-            throw new IllegalArgumentException(
-                    "content must be an " + ItemScope.class + " when type is " + Type.Nested
-            );
-        }
-        if(type == Type.Date && !(content instanceof Date) ) {
-            throw new IllegalArgumentException(
-                    "content must be a " + Date.class.getName() + " whe type is " + Type.Date
-            );
-        }
-        if(content instanceof String && ((String) content).trim().length() == 0) {
-            // ANY23-115 Empty spans seem to break ANY23
-            // instead of throwing the exception and in effect failing the entire
-            // parse job we wish to be lenient on web content publishers and add
-            // Null (String) as content.
-            content = "Null";
-        }
-        this.content = content;
-        this.type = type;
+        this.type = Objects.requireNonNull(type, "type cannot be null");
+        this.content = type.checkClass(content);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/any23/blob/a07d1f05/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads b/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
index e5b6f29..b759d1b 100644
--- a/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
+++ b/test-resources/src/test/resources/microdata/microdata-bad-properties-expected.nquads
@@ -49,7 +49,7 @@ _:node1cjov1p83x7 <http://schema.org/name> "Begin to Knit Classes" <http://bob.e
 _:node1cjov1p83x7 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> .
 _:node1cjov1p83x8 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
 _:node1cjov1p83x9 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x9 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x9 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
 _:node1cjov1p83x8 <http://schema.org/address> _:node1cjov1p83x9 <http://bob.example.com/> .
 _:node1cjov1p83x8 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> .
 _:node1cjov1p83x7 <http://schema.org/location> _:node1cjov1p83x8 <http://bob.example.com/> .
@@ -62,7 +62,7 @@ _:node1cjov1p83x10 <http://schema.org/name> "Cambridge Historic School Museum To
 _:node1cjov1p83x10 <http://schema.org/description> "Built in 1906, the Cambridge Historic School - listed on the..." <http://bob.example.com/> .
 _:node1cjov1p83x11 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
 _:node1cjov1p83x12 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x12 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x12 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
 _:node1cjov1p83x11 <http://schema.org/address> _:node1cjov1p83x12 <http://bob.example.com/> .
 _:node1cjov1p83x11 <http://schema.org/name> "Cambridge Historic School" <http://bob.example.com/> .
 _:node1cjov1p83x10 <http://schema.org/location> _:node1cjov1p83x11 <http://bob.example.com/> .
@@ -75,7 +75,7 @@ _:node1cjov1p83x13 <http://schema.org/name> "Begin to Knit Classes" <http://bob.
 _:node1cjov1p83x13 <http://schema.org/description> "Learn to knit at Kaleidoscope Fibers - Cambridge's speciality yarn,..." <http://bob.example.com/> .
 _:node1cjov1p83x14 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Place> <http://bob.example.com/> .
 _:node1cjov1p83x15 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/PostalAddress> <http://bob.example.com/> .
-_:node1cjov1p83x15 <http://schema.org/streetAddress> "Null" <http://bob.example.com/> .
+_:node1cjov1p83x15 <http://schema.org/streetAddress> "" <http://bob.example.com/> .
 _:node1cjov1p83x14 <http://schema.org/address> _:node1cjov1p83x15 <http://bob.example.com/> .
 _:node1cjov1p83x14 <http://schema.org/name> "Kaleidoscope Fibers (131 W. Main Street" <http://bob.example.com/> .
 _:node1cjov1p83x13 <http://schema.org/location> _:node1cjov1p83x14 <http://bob.example.com/> .