You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2018/01/08 13:14:17 UTC

[1/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and ANY23-131 Nested Microdata are not extracted

Repository: any23
Updated Branches:
  refs/heads/master 97e364ae4 -> 6d0606f9b


http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
index 53899fa..8409a61 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
@@ -16,12 +16,14 @@
 #
 
 <http://books.example.com/works/45U8QJGZSQKDH8N> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Work> <http://bob.example.com/> .
-<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/elements/1.1/creator> "Wil Wheaton" <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/BOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/EBOOK> <http://bob.example.com/> .
 <http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/title> "Just a Geek" <http://bob.example.com/> .
-<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596007683.BOOK> <http://bob.example.com/> .
-<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596802189.EBOOK> <http://bob.example.com/> .
 <http://books.example.com/products/9780596007683.BOOK> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Expression> <http://bob.example.com/> .
 <http://books.example.com/products/9780596007683.BOOK> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/BOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596007683.BOOK> <http://bob.example.com/> .
 <http://books.example.com/products/9780596802189.EBOOK> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Expression> <http://bob.example.com/> .
 <http://books.example.com/products/9780596802189.EBOOK> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/EBOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596802189.EBOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/elements/1.1/creator> "Wil\n      Wheaton" <http://bob.example.com/> .
 <http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> <http://books.example.com/works/45U8QJGZSQKDH8N> <http://bob.example.com/> .
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
index e313ccd..da6148f 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
@@ -14,26 +14,30 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-<dl itemscope
-    itemtype="http://purl.org/vocab/frbr/core#Work"
-    itemid="http://books.example.com/works/45U8QJGZSQKDH8N">
-    <dt>Title</dt>
-    <dd><cite itemprop="http://purl.org/dc/terms/title">Just a Geek</cite></dd>
-    <dt>By</dt>
-    <dd><span itemprop="http://purl.org/dc/elements/1.1/creator">Wil Wheaton</span></dd>
-    <dt>Format</dt>
-    <dd itemprop="http://purl.org/vocab/frbr/core#realization"
-        itemscope
-        itemtype="http://purl.org/vocab/frbr/core#Expression"
-        itemid="http://books.example.com/products/9780596007683.BOOK">
-        <link itemprop="http://purl.org/dc/terms/type" href="http://books.example.com/product-types/BOOK">
-        Print
-    </dd>
-    <dd itemprop="http://purl.org/vocab/frbr/core#realization"
-        itemscope
-        itemtype="http://purl.org/vocab/frbr/core#Expression"
-        itemid="http://books.example.com/products/9780596802189.EBOOK">
-        <link itemprop="http://purl.org/dc/terms/type" href="http://books.example.com/product-types/EBOOK">
-        Ebook
-    </dd>
+<dl itemscope itemtype="http://purl.org/vocab/frbr/core#Work"
+  itemid="http://books.example.com/works/45U8QJGZSQKDH8N">
+  <dt>Title</dt>
+  <dd>
+    <cite itemprop="http://purl.org/dc/terms/title">Just a Geek</cite>
+  </dd>
+  <dt>By</dt>
+  <dd>
+    <span itemprop="http://purl.org/dc/elements/1.1/creator">Wil
+      Wheaton</span>
+  </dd>
+  <dt>Format</dt>
+  <dd itemprop="http://purl.org/vocab/frbr/core#realization" itemscope
+    itemtype="http://purl.org/vocab/frbr/core#Expression"
+    itemid="http://books.example.com/products/9780596007683.BOOK">
+    <link itemprop="http://purl.org/dc/terms/type"
+      href="http://books.example.com/product-types/BOOK">
+    Print
+  </dd>
+  <dd itemprop="http://purl.org/vocab/frbr/core#realization" itemscope
+    itemtype="http://purl.org/vocab/frbr/core#Expression"
+    itemid="http://books.example.com/products/9780596802189.EBOOK">
+    <link itemprop="http://purl.org/dc/terms/type"
+      href="http://books.example.com/product-types/EBOOK">
+    Ebook
+  </dd>
 </dl>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
index eb6dcd9..2632f7a 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
@@ -15,18 +15,21 @@
 #  limitations under the License.
 #
 
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node161nd8236x293102 <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node161nd8236x293103 <http://bob.example.com/> .
-_:node161nd8236x293102 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
-_:node161nd8236x293102 <http://microformats.org/profile/hcard/fn> "Princeton" <http://bob.example.com/> .
-_:node161nd8236x293102 <http://microformats.org/profile/hcard/n> _:node161nd8236x293104 <http://bob.example.com/> .
-_:node161nd8236x293102 <http://microformats.org/profile/hcard/adr> _:node161nd8236x293105 <http://bob.example.com/> .
-_:node161nd8236x293104 <http://microformats.org/profile/hcard/given-name> "Princeton" <http://bob.example.com/> .
-_:node161nd8236x293103 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
-_:node161nd8236x293103 <http://microformats.org/profile/hcard/fn> "Trekkie" <http://bob.example.com/> .
-_:node161nd8236x293103 <http://microformats.org/profile/hcard/n> _:node161nd8236x293106 <http://bob.example.com/> .
-_:node161nd8236x293103 <http://microformats.org/profile/hcard/adr> _:node161nd8236x293105 <http://bob.example.com/> .
-_:node161nd8236x293106 <http://microformats.org/profile/hcard/given-name> "Trekkie" <http://bob.example.com/> .
-_:node161nd8236x293105 <http://microformats.org/profile/hcard/street-address> "Avenue Q" <http://bob.example.com/> .
-<http://bob.example.com/> <http://vocab.sindice.net/date> "2011-06-08T12:56:39+01:00" <http://bob.example.com/> .
-<http://bob.example.com/> <http://vocab.sindice.net/size> "15"^^<http://www.w3.org/2001/XMLSchema#int> <http://bob.example.com/> .
\ No newline at end of file
+_:nodebdb2c525cf8095abb6954b51432e6 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/street-address> "Avenue Q" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/fn> "Princeton" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/given-name> "Princeton" <http://bob.example.com/> .
+_:node5194c3bb9d7f53e4759c6f393d95f88 <http://schema.org/given-name> "Princeton" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/n> _:node5194c3bb9d7f53e4759c6f393d95f88 <http://bob.example.com/> .
+_:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://schema.org/street-address> "Avenue Q" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/adr> _:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodebdb2c525cf8095abb6954b51432e6 <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/street-address> "Avenue Q" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/fn> "Trekkie" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/given-name> "Trekkie" <http://bob.example.com/> .
+_:node45173ea18b736c2e9c3136e52ed3727e <http://schema.org/given-name> "Trekkie" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/n> _:node45173ea18b736c2e9c3136e52ed3727e <http://bob.example.com/> .
+_:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://schema.org/street-address> "Avenue Q" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/adr> _:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node7a12e48e321d29211c8b7c2ce396854 <http://bob.example.com/> .
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
index 7234b21..e38d2a8 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
@@ -15,12 +15,12 @@
   limitations under the License.
 -->
 <p>
-    Both
- <span itemscope itemtype="http://microformats.org/profile/hcard" itemref="home"><span itemprop="fn"
-         ><span itemprop="n" itemscope><span itemprop="given-name">Princeton</span></span></span></span>
-    and
- <span itemscope itemtype="http://microformats.org/profile/hcard" itemref="home"><span itemprop="fn"
-         ><span itemprop="n" itemscope><span itemprop="given-name">Trekkie</span></span></span></span>
-    live at
-    <span id="home" itemprop="adr" itemscope><span itemprop="street-address">Avenue Q</span>.</span>
+  Both <span itemscope itemtype="http://microformats.org/profile/hcard"
+    itemref="home"><span itemprop="fn"><span
+      itemprop="n" itemscope><span itemprop="given-name">Princeton</span></span></span></span>
+  and <span itemscope itemtype="http://microformats.org/profile/hcard"
+    itemref="home"><span itemprop="fn"><span
+      itemprop="n" itemscope><span itemprop="given-name">Trekkie</span></span></span></span>
+  live at <span id="home" itemprop="adr" itemscope><span
+    itemprop="street-address">Avenue Q</span>.</span>
 </p>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-basic-expected.properties b/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
index a020a4b..ee57018 100644
--- a/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
+++ b/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
@@ -17,13 +17,13 @@
 
 result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Elizabeth", "type" : "Plain" } } ] }
 result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[2]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Daniel", "type" : "Plain" } } ] }
-result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[3]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "band", "value" : { "content" : "Four Parts Water", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[3]/SPAN[1]", "name" : "nationality", "value" : { "content" : "British", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Neil", "type" : "Plain" } } ] }
+result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[3]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[3]/SPAN[1]", "name" : "nationality", "value" : { "content" : "British", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Neil", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "band", "value" : { "content" : "Four Parts Water", "type" : "Plain" } } ] }
 result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/IMG[1]", "name" : "image", "value" : { "content" : "google-logo.png", "type" : "Link" } } ] }
 result4={ "xpath" : "/HTML[1]/BODY[1]/DIV[5]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[5]/TIME[1]", "name" : "birthday", "value" : { "content" : "2009-05-10", "type" : "Date" } } ] }
 result5={ "xpath" : "/HTML[1]/BODY[1]/DIV[6]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[6]/UL[1]/LI[1]", "name" : "flavor", "value" : { "content" : "Lemon sorbet", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[6]/UL[1]/LI[2]", "name" : "flavor", "value" : { "content" : "Apricot sorbet", "type" : "Plain" } } ] }
 result6={ "xpath" : "/HTML[1]/BODY[1]/DIV[7]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[7]/SPAN[1]", "name" : "favorite-fruit", "value" : { "content" : "orange", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[7]/SPAN[1]", "name" : "favorite-color", "value" : { "content" : "orange", "type" : "Plain" } } ] }
 result7={ "xpath" : "/HTML[1]/BODY[1]/FIGURE[1]/FIGCAPTION[1]/SPAN[1]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/FIGURE[1]/FIGCAPTION[1]/SPAN[1]/SPAN[1]", "name" : "name", "value" : { "content" : "The Castle", "type" : "Plain" } } ] }
 result8={ "xpath" : "/HTML[1]/BODY[1]/SPAN[1]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SPAN[1]/META[1]", "name" : "name", "value" : { "content" : "The Castle", "type" : "Plain" } } ] }
-result9={ "xpath" : "/HTML[1]/BODY[1]/SECTION[1]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair,\\n     with a fluffy black fur with white paws and belly.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } } ] }
+result9={ "xpath" : "/HTML[1]/BODY[1]/SECTION[1]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair, with a fluffy black fur with white paws and belly.", "type" : "Plain" } } ] }
 result10={ "xpath" : "/HTML[1]/BODY[1]/DL[1]", "id" : null, "refs" : [], "type" : "http://vocab.example.net/book", "itemid" : "urn:isbn:0-330-34032-8", "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DL[1]/DD[2]", "name" : "author", "value" : { "content" : "Peter F. Hamilton\\n    ", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DL[1]/DD[1]", "name" : "title", "value" : { "content" : "The Reality Dysfunction\\n    ", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DL[1]/DD[3]/TIME[1]", "name" : "pubdate", "value" : { "content" : "1996-01-26", "type" : "Date" } } ] }
-result11={ "xpath" : "/HTML[1]/BODY[1]/SECTION[2]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair, with a fluffy\\n        black fur with\\n        white paws and belly.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "http://example.com/fn", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[1]", "name" : "http://example.com/color", "value" : { "content" : "black", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[2]", "name" : "http://exa
 mple.com/color", "value" : { "content" : "white", "type" : "Plain" } } ] }
\ No newline at end of file
+result11={ "xpath" : "/HTML[1]/BODY[1]/SECTION[2]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[1]", "name" : "http://example.com/color", "value" : { "content" : "black", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[2]", "name" : "http://example.com/color", "value" : { "content" : "white", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "http://example.com/fn", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair, with a fluffy black fur
  with white paws and belly.", "type" : "Plain" } } ] }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-basic.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-basic.html b/test-resources/src/test/resources/microdata/microdata-basic.html
index e7d4dba..695d126 100644
--- a/test-resources/src/test/resources/microdata/microdata-basic.html
+++ b/test-resources/src/test/resources/microdata/microdata-basic.html
@@ -75,10 +75,8 @@
   <figure>
     <img src="castle.jpeg">
     <figcaption>
-      <span itemscope>
-        <span itemprop="name">The Castle</span>
-      </span>
-      (1986)
+      <span itemscope> <span itemprop="name">The Castle</span>
+      </span> (1986)
     </figcaption>
   </figure>
 
@@ -92,8 +90,7 @@
   <!--  result9 -->
   <section itemscope itemtype="http://example.org/animals#cat">
     <h1 itemprop="name">Hedral</h1>
-    <p itemprop="desc">Hedral is a male american domestic shorthair,
-      with a fluffy black fur with white paws and belly.</p>
+    <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy black fur with white paws and belly.</p>
     <img itemprop="img" src="hedral.jpeg" alt=""
       title="Hedral, age 18 months">
   </section>
@@ -114,11 +111,9 @@
   <!--  result11 -->
   <section itemscope itemtype="http://example.org/animals#cat">
     <h1 itemprop="name http://example.com/fn">Hedral</h1>
-    <p itemprop="desc">
-      Hedral is a male american domestic shorthair, with a fluffy <span
+    <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy <span
         itemprop="http://example.com/color">black</span> fur with <span
-        itemprop="http://example.com/color">white</span> paws and belly.
-    </p>
+        itemprop="http://example.com/color">white</span> paws and belly.</p>
     <img itemprop="img" src="hedral.jpeg" alt=""
       title="Hedral, age 18 months">
   </section>

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties b/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
index 8b60e54..843e957 100644
--- a/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
+++ b/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
@@ -15,11 +15,15 @@
 #  limitations under the License.
 #
 
-result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]", "id" : "is1", "refs" : [], "type" : "http://type/IScopeType1", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[1]", "name" : "p2", "value" : { "content" : "Some Text 1", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[1]", "name" : "p1", "value" : { "content" : "http://www.domain.org/path/1", "type" : "Link" } } ] }
-result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "id" : "is2", "refs" : ["ip5", "ip4", "ip3", "unexisting"], "type" : "http://type/IScopeType2", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/P[1]", "name" : "p6", "value" : { "content" : "Some Text 3", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/IMG[1]", "name" : "p5", "value" : { "content" : "http://source/dom/path", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } } ] }
-result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]", "id" : "loops", "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "name" : "next", "value" : { "c
 ontent" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "id" : "loop4", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "name" : "self", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "id" : "loop0", "refs" : ["loop0"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[
 3]/DIV[2]", "name" : "head", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "id" : "loop1", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }
-result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "id" : "idItem", "refs" : ["insideOut"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via tree.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via parent, before.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[3]", "name" : "prop", "value" : { "content" : "Included via parent, after.", "type" : "Plain" } } ] }
-error0={ "message" : "Unknown itemProp id 'unexisting'", "path" : "null", "begin_row" : -1, "begin_col" : -1, "end_row" : -1, "end_col" : -1 }
-error1={ "message" : "Duplicated deferred itemProp 'p5'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 26, "begin_col" : 5, "end_row" : 26, "end_col" : 97 }
-error2={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop0' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "begin_row" : 33, "begin_col" : 9, "end_row" : 33, "end_col" : 67 }
-error3={ "message" : "Loop detected with depth 3 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "begin_row" : 37, "begin_col" : 9, "end_row" : 37, "end_col" : 67 }
\ No newline at end of file
+result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]", "id" : "is1", "refs" : [], "type" : "http://type/IScopeType1", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[1]", "name" : "p1", "value" : { "content" : "http://www.domain.org/path/1", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[1]", "name" : "p2", "value" : { "content" : "Some Text 1", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } } ] }
+result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "id" : "is2", "refs" : ["ip5", "ip4", "ip3", "unexisting"], "type" : "http://type/IScopeType2", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/IMG[1]", "name" : "p5", "value" : { "content" : "http://source/dom/path", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/P[1]", "name" : "p6", "value" : { "content" : "Some Text 3", "type" : "Plain" } } ] }
+result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]", "id" : "loops", "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "name" : "head", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "id" : "loop1", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "name" : "next", "value" : { 
 "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "id" : "loop4", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "name" : "self", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "id" : "loop0", "refs" : ["loop0"], "type" : null, "itemid" : null, "properties" : [  ] }, "type" : "Nested" } } ] }
+result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "id" : "idItem", "refs" : ["insideOut"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via tree.", "type" : "Plain" } } ] }
+error0={ "message" : "Duplicated deferred itemProp 'p5'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 28, "begin_col" : 5, "end_row" : 29, "end_col" : 40 }
+error1={ "message" : "Duplicated deferred itemProp 'p6'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 28, "begin_col" : 5, "end_row" : 29, "end_col" : 40 }
+error2={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop0' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "begin_row" : 36, "begin_col" : 7, "end_row" : 36, "end_col" : 65 }
+error3={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "begin_row" : 37, "begin_col" : 7, "end_row" : 37, "end_col" : 65 }
+error4={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop3' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "begin_row" : 39, "begin_col" : 7, "end_row" : 39, "end_col" : 65 }
+error5={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop4' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "begin_row" : 40, "begin_col" : 7, "end_row" : 40, "end_col" : 65 }
+error6={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "begin_row" : 41, "begin_col" : 7, "end_row" : 41, "end_col" : 65 }
+error7={ "message" : "Duplicated deferred itemProp 'prop'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "begin_row" : 48, "begin_col" : 7, "end_row" : 48, "end_col" : 54 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-itemref.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-itemref.html b/test-resources/src/test/resources/microdata/microdata-itemref.html
index d133330..f8ff990 100644
--- a/test-resources/src/test/resources/microdata/microdata-itemref.html
+++ b/test-resources/src/test/resources/microdata/microdata-itemref.html
@@ -14,35 +14,43 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-<div id="data">
+<html>
+<body>
+  <div id="data">
     <div id="is1" itemscope itemtype="http://type/IScopeType1">
-        <a id="ip1" itemprop="p1" href="http://www.domain.org/path/1"></a>
-        <p id="ip2" itemprop="p2">Some Text 1</p>
-        <p id="ip3" itemprop="p3">Some Text 2</p>
-        <a id="ip4" itemprop="p4" href="http://www.domain.org/path/2"></a>
+      <a id="ip1" itemprop="p1" href="http://www.domain.org/path/1"></a>
+      <p id="ip2" itemprop="p2">Some Text 1</p>
+      <p id="ip3" itemprop="p3">Some Text 2</p>
+      <a id="ip4" itemprop="p4" href="http://www.domain.org/path/2"></a>
     </div>
 
     <!-- Duplicate and unexisting itemrefs. -->
-    <div id="is2" itemscope itemtype="http://type/IScopeType2" itemref="ip5 ip4 ip3 unexisting">
-        <img id="ip5" itemprop="p5" src="http://source/dom/path">
-        <p   id="ip6" itemprop="p6">Some Text 3</p>
+    <div id="is2" itemscope itemtype="http://type/IScopeType2"
+      itemref="ip5 ip4 ip3 unexisting">
+      <img id="ip5" itemprop="p5" src="http://source/dom/path">
+      <p id="ip6" itemprop="p6">Some Text 3</p>
     </div>
 
     <!-- Loops. -->
     <div id="loops" itemscope>
-        <div id="loop0" itemprop="self" itemscope itemref="loop0"></div>
-        <div id="loop1" itemprop="head" itemscope itemref="loop2">not in the loop</div>
-        <div id="loop2" itemprop="next" itemscope itemref="loop3"></div>
-        <div id="loop3" itemprop="next" itemscope itemref="loop4"></div>
-        <div id="loop4" itemprop="next" itemscope itemref="loop2"></div>
+      <div id="loop0" itemprop="self" itemscope itemref="loop0"></div>
+      <div id="loop1" itemprop="head" itemscope itemref="loop2">not
+        in the loop</div>
+      <div id="loop2" itemprop="next" itemscope itemref="loop3"></div>
+      <div id="loop3" itemprop="next" itemscope itemref="loop4"></div>
+      <div id="loop4" itemprop="next" itemscope itemref="loop2"></div>
     </div>
 
     <!-- Inside - Out Nesting. -->
     <div id="insideOut">
-        <div id="idBefore" itemprop="prop">Included via parent, before.</div>
-        <div id="idItem" itemscope itemref="insideOut">
-            <div id="ioChild" itemprop="prop">Included via tree.</div>
-        </div>
-        <div id="idAfter" itemprop="prop">Included via parent, after.</div>
+      <div id="idBefore" itemprop="prop">Included via parent,
+        before.</div>
+      <div id="idItem" itemscope itemref="insideOut">
+        <div id="ioChild" itemprop="prop">Included via tree.</div>
+      </div>
+      <div id="idAfter" itemprop="prop">Included via parent,
+        after.</div>
     </div>
-</div>
+  </div>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-json-serialization.json
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-json-serialization.json b/test-resources/src/test/resources/microdata/microdata-json-serialization.json
index 27692dc..6a3beec 100644
--- a/test-resources/src/test/resources/microdata/microdata-json-serialization.json
+++ b/test-resources/src/test/resources/microdata/microdata-json-serialization.json
@@ -1 +1 @@
-{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "name" : "band", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : "b", "refs" : ["c"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Jazz Band", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "size", "value" : { "content" : "12", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Amanda", "type" : "Plain" } } ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" 
 : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } } ] }] }
\ No newline at end of file
+{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [  ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }] }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
index fa6686d..dbf6d4a 100644
--- a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
+++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
@@ -15,14 +15,11 @@
 #  limitations under the License.
 #
 
-_:node3e103839c5eaa86c975e26a96157bf2 <http://schema.org/name> "Jazz Band" <http://bob.example.com/> .
-_:node3e103839c5eaa86c975e26a96157bf2 <http://schema.org/size> "12" <http://bob.example.com/> .
-_:node70664686c06639b5211a24a9cf34f99 <http://schema.org/band> _:node3e103839c5eaa86c975e26a96157bf2 <http://bob.example.com/> .
-_:node70664686c06639b5211a24a9cf34f99 <http://schema.org/name> "Amanda" <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node70664686c06639b5211a24a9cf34f99 <http://bob.example.com/> .
-_:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Movie> <http://bob.example.com/> .
-_:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://schema.org/Movie/name> "Avatar" <http://bob.example.com/> .
-_:noded2bffd25e1a8448b159c4c45afd3283 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://bob.example.com/> .
-_:noded2bffd25e1a8448b159c4c45afd3283 <http://schema.org/Person/name> "James Cameron" <http://bob.example.com/> .
-_:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://schema.org/Movie/director> _:noded2bffd25e1a8448b159c4c45afd3283 <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://bob.example.com/> .
\ No newline at end of file
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node295195eb5d5124e03da26bafc7313bc <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Movie> <http://bob.example.com/> .
+_:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://bob.example.com/> .
+_:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://schema.org/Person/name> "James Cameron" <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/Movie/director> _:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/Movie/name> "Avatar" <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/Movie/name> "James Cameron" <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node3ecb85b37ebfd65a5d57ab82374a5 <http://bob.example.com/> .
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
index 2f65ec4..ca05f33 100644
--- a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
+++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
@@ -15,5 +15,5 @@
 #  limitations under the License.
 #
 
-result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "name" : "band", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : "b", "refs" : ["c"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Jazz Band", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "size", "value" : { "content" : "12", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Amanda", "type" : "Plain" } } ] }
-result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } } ] }
\ No newline at end of file
+result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [  ] }
+result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-nested.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-nested.html b/test-resources/src/test/resources/microdata/microdata-nested.html
index aaf29d6..bc81642 100644
--- a/test-resources/src/test/resources/microdata/microdata-nested.html
+++ b/test-resources/src/test/resources/microdata/microdata-nested.html
@@ -18,22 +18,29 @@
 <head>
 <body>
 
-<!--  result0 -->
-<div itemscope id="amanda" itemref="a b"></div>
-<p id="a">Name: <span itemprop="name">Amanda</span></p>
-<div id="b" itemprop="band" itemscope itemref="c"></div>
-<div id="c">
- <p>Band: <span itemprop="name">Jazz Band</span></p>
- <p>Size: <span itemprop="size">12</span> players</p>
-</div>
+  <!--  result0 -->
+  <div itemscope id="amanda" itemref="a b"></div>
+  <p id="a">
+    Name: <span itemprop="name">Amanda</span>
+  </p>
+  <div id="b" itemprop="band" itemscope itemref="c"></div>
+  <div id="c">
+    <p>
+      Band: <span itemprop="name">Jazz Band</span>
+    </p>
+    <p>
+      Size: <span itemprop="size">12</span> players
+    </p>
+  </div>
 
-<!-- result1 -->
-<div itemscope itemtype="http://schema.org/Movie">
+  <!-- result1 -->
+  <div itemscope itemtype="http://schema.org/Movie">
     <h1 itemprop="name">Avatar</h1>
-    <div itemprop="director" itemscope itemtype="http://schema.org/Person">
-        Director: <span itemprop="name">James Cameron</span>
+    <div itemprop="director" itemscope
+      itemtype="http://schema.org/Person">
+      Director: <span itemprop="name">James Cameron</span>
     </div>
-</div>
+  </div>
 
 </body>
 </head>

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads b/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
index 9ea9752..f59e6a0 100644
--- a/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
+++ b/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
@@ -15,15 +15,18 @@
 #  limitations under the License.
 #
 
-_:nodee6ae2861b16a51a6519079d82c13763 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Person> <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/title> "graduate research assistant" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/nickname> "Johnny" <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Address> <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://data-vocabulary.org/Address/region> "Georgia" <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://data-vocabulary.org/Address/locality> "Warner Robins" <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://data-vocabulary.org/Address/street-address> "1234 Peach Drive" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/address> _:node14cf7264b949d7a9e18598d198e5f65 <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/name> "John Doe" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/affiliation> "University of Dreams" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/url> <http://www.JohnnyD.com> <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodee6ae2861b16a51a6519079d82c13763 <http://bob.example.com/> .
\ No newline at end of file
+_:node9423934b5f186fd49d90edd31b5625ba <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Person> <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Address> <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/Address/street-address> "1234 Peach Drive" <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/Address/locality> "Warner Robins" <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/Address/region> "Georgia" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/address> _:nodee94f8737ad89876c85bd87156a1eb585 <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/affiliation> "University of Dreams" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/street-address> "1234 Peach Drive" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/name> "John Doe" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/nickname> "Johnny" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/locality> "Warner Robins" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/title> "graduate research assistant" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/region> "Georgia" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/url> <http://www.JohnnyD.com> <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node9423934b5f186fd49d90edd31b5625ba <http://bob.example.com/> .
\ No newline at end of file


[2/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and ANY23-131 Nested Microdata are not extracted

Posted by le...@apache.org.
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
index c505bb8..1bffcee 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
@@ -17,13 +17,15 @@
 
 package org.apache.any23.extractor.html;
 
-import junit.framework.Assert;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.IssueReport;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.vocab.VCard;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.Statement;
@@ -40,929 +42,927 @@ import java.util.List;
  */
 public class HCardExtractorTest extends AbstractExtractorTestCase {
 
-	private static final VCard vVCARD = VCard.getInstance();
-
-	protected ExtractorFactory<?> getExtractorFactory() {
-		return new HCardExtractorFactory();
-	}
-
-	@Test
-	public void testEMailNotUriReal() throws Exception {
-		assertExtract("/microformats/hcard/17-email-not-uri.html");
-		assertDefaultVCard();
-		assertJohn();
-		assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
-	}
-
-	@Test
-	public void testTel() throws Exception {
-		assertExtract("/microformats/hcard/21-tel.html");
-		assertDefaultVCard();
-		String[] tels = { "+1.415.555.1231", "+1.415.555.1235",
-				"+1.415.555.1236", "+1.415.555.1237", "+1.415.555.1238",
-				"+1.415.555.1239", "+1.415.555.1240", "+1.415.555.1241",
-				"+1.415.555.1242", "+1.415.555.1243" };
-		for (String tel : tels) {
-			assertContains(vVCARD.tel, RDFUtils.iri("tel:" + tel));
-		}
-		Resource telResource = RDFUtils.iri("tel:+14155551233");
-		assertContains(vVCARD.fax, telResource);
-		assertContains(vVCARD.workTel, telResource);
-		assertContains(vVCARD.homeTel, telResource);
-		assertJohn();
-	}
-
-	@Test
-	public void testAbbrTitleEverything() throws Exception {
-		assertExtract("/microformats/hcard/23-abbr-title-everything.html");
-		assertDefaultVCard();
-
-		assertContains(vVCARD.fn, "John Doe");
-		assertContains(vVCARD.nickname, "JJ");
-
-		assertContains(vVCARD.given_name, "Jonathan");
-		assertContains(vVCARD.additional_name, "John");
-		assertContains(vVCARD.family_name, "Doe-Smith");
-		assertContains(vVCARD.honorific_suffix, "Medical Doctor");
-
-		assertContains(vVCARD.title, "President");
-		assertContains(vVCARD.role, "Chief");
-		assertContains(vVCARD.tz, "-0700");
-		assertContains(vVCARD.bday, "2006-04-04");
-		assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
-		assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
-		assertContains(vVCARD.class_, "public");
-		assertContains(vVCARD.note, "this is a note");
-		assertContains(vVCARD.organization_name, "Intellicorp");
-		assertContains(vVCARD.organization_unit, "Intelligence");
-
-		// We define the property in this extractor _but_ we do not parse it.
-		assertContains(vVCARD.geo, (Resource) null);
-		// Thus we do not cointain these.
-		// The interaction is in @link RDFMergerTest.java
-		assertNotContains(RDF.TYPE, vVCARD.Location);
-		assertNotContains(null, vVCARD.latitude, "37.77");
-		assertNotContains(null, vVCARD.longitude, "-122.41");
-
-		// see above
-		assertContains(vVCARD.adr, (Resource) null);
-		assertNotContains(RDF.TYPE, vVCARD.Address);
-		assertNotContains(null, vVCARD.post_office_box, "Box 1234");
-		assertNotContains(null, vVCARD.extended_address, "Suite 100");
-		assertNotContains(null, vVCARD.street_address, "123 Fake Street");
-		assertNotContains(null, vVCARD.locality, "San Francisco");
-		assertNotContains(null, vVCARD.region, "California");
-		assertNotContains(null, vVCARD.postal_code, "12345-6789");
-		assertNotContains(null, vVCARD.country_name, "United States of America");
-		assertNotContains(null, vVCARD.addressType, "work");
-	}
-
-	@Test
-	public void testGeoAbbr() throws Exception {
-		assertExtract("/microformats/hcard/25-geo-abbr.html");
-		assertModelNotEmpty();
-		assertContains(vVCARD.fn, "Paradise");
-		assertContains(RDF.TYPE, vVCARD.Organization);
-		assertContains(vVCARD.organization_name, "Paradise");
-		// See above: geo property yes, gteo blank node no.
-		assertContains(vVCARD.geo, (Resource) null);
-		assertNotContains(RDF.TYPE, vVCARD.Location);
-		assertNotContains(null, vVCARD.latitude, "30.267991");
-		assertNotContains(null, vVCARD.longitude, "-97.739568");
-	}
-
-	@Test
-	public void testAncestors() throws Exception {
-		assertExtract("/microformats/hcard/26-ancestors.html");
-		assertModelNotEmpty();
-
-		assertContains(vVCARD.fn, "John Doe");
-		assertNotContains(null, vVCARD.fn,
-				"Mister Jonathan John Doe-Smith Medical Doctor");
-		assertContains(vVCARD.nickname, "JJ");
-		assertNotContains(RDF.TYPE, vVCARD.Address);
-		assertContains(vVCARD.tz, "-0700");
-		assertContains(vVCARD.title, "President");
-		assertContains(vVCARD.role, "Chief");
-		assertContains(vVCARD.organization_name, "Intellicorp");
-		assertContains(vVCARD.organization_unit, "Intelligence");
-
-		assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
-		assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
-		assertContains(vVCARD.note, "this is a note");
-		assertContains(vVCARD.class_, "public");
-
-		assertNotContains(RDF.TYPE, vVCARD.Location);
-		assertContains(vVCARD.geo, (Resource) null);
-		assertNotContains(null, vVCARD.latitude, "37.77");
-		assertNotContains(null, vVCARD.longitude, "-122.41");
-
-		assertContains(RDF.TYPE, vVCARD.Name);
-		assertContains(vVCARD.additional_name, "John");
-		assertContains(vVCARD.given_name, "Jonathan");
-		assertContains(vVCARD.family_name, "Doe-Smith");
-		assertContains(vVCARD.honorific_prefix, "Mister");
-		assertContains(vVCARD.honorific_suffix, "Medical Doctor");
-
-		assertNotContains(null, vVCARD.post_office_box, "Box 1234");
-		assertNotContains(null, vVCARD.extended_address, "Suite 100");
-		assertNotContains(null, vVCARD.street_address, "123 Fake Street");
-		assertNotContains(null, vVCARD.locality, "San Francisco");
-		assertNotContains(null, vVCARD.region, "California");
-		assertNotContains(null, vVCARD.postal_code, "12345-6789");
-		assertNotContains(null, vVCARD.country_name, "United States of America");
-		assertNotContains(null, vVCARD.addressType, "work");
-	}
-
-	@Test
-	public void testfnOrg() throws Exception {
-		assertExtract("/microformats/hcard/30-fn-org.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
-		RepositoryResult<Statement> repositoryResult = getStatements(null,
-				RDF.TYPE, vVCARD.VCard);
-		try {
-			while (repositoryResult.hasNext()) {
-				Resource card = repositoryResult.next().getSubject();
-				Assert.assertNotNull(findObject(card, vVCARD.fn));
-				String name = findObjectAsLiteral(card, vVCARD.fn);
-
-				Assert.assertNotNull(findObject(card, vVCARD.org));
-				Resource org = findObjectAsResource(card, vVCARD.org);
-				Assert.assertNotNull(findObject(org, vVCARD.organization_name));
-
-				if (name.equals("Dan Connolly")) {
-					Assert.assertNotNull(findObject(card, vVCARD.n));
-					Assert.assertFalse(name.equals(org.stringValue()));
-				}
-			}
-		} finally {
-			repositoryResult.close();
-		}
-	}
-
-	@Test
-	public void testInclude() throws Exception {
-		assertExtract("/microformats/hcard/31-include.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
-		assertStatementsSize(vVCARD.email, (Value) null, 3);
-
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		try {
-			while (statements.hasNext()) {
-				Resource vcard = statements.next().getSubject();
-
-				Assert.assertNotNull(findObject(vcard, vVCARD.fn));
-				Assert.assertEquals("Brian Suda",
-						findObjectAsLiteral(vcard, vVCARD.fn));
-
-				Assert.assertNotNull(findObject(vcard, vVCARD.url));
-				String url = findObjectAsResource(vcard, vVCARD.url)
-						.stringValue();
-				Assert.assertEquals("http://suda.co.uk/", url);
-
-				Resource name = findObjectAsResource(vcard, vVCARD.n);
-				Assert.assertEquals("Brian",
-						findObjectAsLiteral(name, vVCARD.given_name));
-				Assert.assertEquals("Suda",
-						findObjectAsLiteral(name, vVCARD.family_name));
-
-				// Included data.
-				Assert.assertNotNull(findObject(vcard, vVCARD.email));
-				String mail = findObjectAsLiteral(vcard, vVCARD.email);
-				Assert.assertEquals("mailto:correct@example.com", mail);
-			}
-		} finally {
-			statements.close();
-		}
-	}
-
-	@Test
-	public void testHeader() throws Exception {
-		assertExtract("/microformats/hcard/32-header.html");
-		assertModelNotEmpty();
-		// check fn, name, family, nick.
-		assertJohn();
-
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		try {
-			Resource example = RDFUtils.iri("http://example.org/");
-			while (statements.hasNext()) {
-				Resource card = statements.next().getSubject();
-				Assert.assertNotNull(findObject(card, vVCARD.fn));
-
-				String fn = findObjectAsLiteral(card, vVCARD.fn);
-				if ("Jane Doe".equals(fn)) {
-					assertNotFound(card, vVCARD.org);
-				} else {
-					Assert.assertTrue("John Doe".equals(fn)
-							|| "Brian Suda".equals(fn));
-
-					Assert.assertNotNull(findObject(card, vVCARD.url));
-					Assert.assertEquals(example,
-							findObjectAsResource(card, vVCARD.url));
-
-					Assert.assertNotNull(findObject(card, vVCARD.org));
-					Resource org = findObjectAsResource(card, vVCARD.org);
-					assertContains(org, RDF.TYPE, vVCARD.Organization);
-					Assert.assertNotNull(org);
-					Assert.assertNotNull(findObject(card, vVCARD.org));
-					Assert.assertNotNull(findObject(org,
-							vVCARD.organization_name));
-					Assert.assertEquals("example.org",
-							findObjectAsLiteral(org, vVCARD.organization_name));
-				}
-			}
-			// Just to be sure there are no spurious statements.
-			// assertStatementsSize(VCARD.org, null, 2);
-			assertStatementsSize(vVCARD.url, example, 2);
-		} finally {
-			statements.close();
-		}
-	}
-
-	@Test
-	public void testAreaFull() throws Exception {
-		assertExtract("/microformats/hcard/33-area.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
-
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		while (statements.hasNext()) {
-			Resource vcard = statements.next().getSubject();
-			final Value fnValue = findObject(vcard, vVCARD.fn);
-			Assert.assertNotNull(fnValue);
-			String fn = fnValue.stringValue();
-			final Value vcardValue = findObject(vcard, vVCARD.url);
-			Assert.assertNotNull(vcardValue);
-			String url = vcardValue.stringValue();
-			final Value emailValue = findObject(vcard, vVCARD.email);
-			Assert.assertNotNull(emailValue);
-			String mail = emailValue.stringValue();
-			Assert.assertEquals("Joe Public", fn);
-			Assert.assertEquals("http://example.com/", url);
-			Assert.assertEquals("mailto:joe@example.com", mail);
-		}
-	}
-
-	@Test
-	public void testCategories() throws Exception {
-		assertExtract("/microformats/hcard/36-categories.html");
-		assertModelNotEmpty();
-		assertContains(vVCARD.given_name, "Joe");
-		assertContains(vVCARD.given_name, "john");
-		assertContains(vVCARD.family_name, "doe");
-		assertContains(vVCARD.family_name, "User");
-		assertContains(vVCARD.fn, "john doe");
-		assertContains(vVCARD.fn, "Joe User");
-
-		assertContains(vVCARD.category, "C1");
-		assertContains(vVCARD.category, "C2a");
-		assertContains(vVCARD.category, "C4");
-		assertContains(vVCARD.category, "User");
-		String[] cats = { "C3", "C5", "C6", "C7", "C9", "luser", "D1", "D2",
-				"D3" };
-		for (String cat : cats)
-			assertContains(vVCARD.category, "http://example.com/tag/" + cat);
-
-		assertNotContains(null, vVCARD.category, "D4");
-	}
-
-	@Test
-	public void testSingleton() throws Exception {
-		// this tests probably tests that e just get the first fn and so on
-		assertExtract("/microformats/hcard/37-singleton.html");
-		assertModelNotEmpty();
-		assertStatementsSize(vVCARD.fn, (Value) null, 1);
-		assertContains(vVCARD.fn, "john doe 1");
-
-		assertStatementsSize(RDF.TYPE, vVCARD.Name, 1);
-		assertStatementsSize(vVCARD.given_name, (Value) null, 1);
-		assertContains(vVCARD.given_name, "john");
-		assertStatementsSize(vVCARD.family_name, (Value) null, 1);
-		assertContains(vVCARD.family_name, "doe");
-		assertStatementsSize(vVCARD.sort_string, (Value) null, 1);
-		assertContains(vVCARD.sort_string, "d");
-
-		assertStatementsSize(vVCARD.bday, (Value) null, 1);
-		assertContains(vVCARD.bday, "20060707");
-		assertStatementsSize(vVCARD.rev, (Value) null, 1);
-		assertContains(vVCARD.rev, "20060707");
-		assertStatementsSize(vVCARD.class_, (Value) null, 1);
-		assertContains(vVCARD.class_, "public");
-		assertStatementsSize(vVCARD.tz, (Value) null, 1);
-		assertContains(vVCARD.tz, "+0600");
-
-		// Why 0? because the extractor does not look at geo uF!
-		assertStatementsSize(RDF.TYPE, vVCARD.Location, 0);
-		assertStatementsSize(vVCARD.geo, (Value) null, 2);
-
-		assertNotContains(null, vVCARD.latitude, "123.45");
-		assertNotContains(null, vVCARD.longitude, "67.89");
-
-		assertStatementsSize(vVCARD.uid, (Value) null, 1);
-		assertContains(vVCARD.uid, "unique-id-1");
-	}
-
-	@Test
-	public void testUidFull() throws Exception {
-		assertExtract("/microformats/hcard/38-uid.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-
-		try {
-			while (statements.hasNext()) {
-				Resource vcard = statements.next().getSubject();
-				Assert.assertNotNull(findObject(vcard, vVCARD.fn));
-				String fn = findObjectAsLiteral(vcard, vVCARD.fn);
-				Assert.assertEquals("Ryan King", fn);
-
-				Assert.assertNotNull(findObject(vcard, vVCARD.n));
-				Resource n = findObjectAsResource(vcard, vVCARD.n);
-				Assert.assertNotNull(n);
-				Assert.assertNotNull(findObject(n, vVCARD.given_name));
-				Assert.assertEquals("Ryan",
-						findObjectAsLiteral(n, vVCARD.given_name));
-				Assert.assertNotNull(findObject(n, vVCARD.family_name));
-				Assert.assertEquals("King",
-						findObjectAsLiteral(n, vVCARD.family_name));
-
-				Assert.assertNotNull(findObject(vcard, vVCARD.url));
-				Resource url = findObjectAsResource(vcard, vVCARD.url);
-
-				Assert.assertNotNull(findObject(vcard, vVCARD.uid));
-				String uid = findObjectAsLiteral(vcard, vVCARD.uid);
-
-				Assert.assertEquals("http://theryanking.com/contact/",
-						url.stringValue());
-				Assert.assertEquals("http://theryanking.com/contact/", uid);
-			}
-		} finally {
-			statements.close();
-		}
-	}
-
-	@Test
-	public void testRomanianWikipedia() throws Exception {
-		assertExtract("/microformats/hcard/40-fn-inside-adr.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-
-		try {
-			while (statements.hasNext()) {
-				Resource card = statements.next().getSubject();
-				Assert.assertNotNull(findObject(card, vVCARD.fn));
-				String fn = findObjectAsLiteral(card, vVCARD.fn);
-				Assert.assertEquals("Berlin", fn);
-
-				Assert.assertNotNull(findObject(card, vVCARD.org));
-				Resource org = findObjectAsResource(card, vVCARD.org);
-				assertContains(org, RDF.TYPE, vVCARD.Organization);
-				Assert.assertNotNull(org);
-				Assert.assertNotNull(findObject(card, vVCARD.org));
-				Assert.assertNotNull(findObject(org, vVCARD.organization_name));
-				Assert.assertEquals("Berlin",
-						findObjectAsLiteral(org, vVCARD.organization_name));
-
-			}
-		} finally {
-			statements.close();
-		}
-	}
-
-	@Test
-	public void testNoMicroformats() throws Exception, IOException,
-			ExtractionException {
-		extract("/html/html-without-uf.html");
-		assertModelEmpty();
-	}
-
-	@Test
-	public void testBasic() throws Exception {
-		assertExtract("/microformats/hcard/01-tantek-basic.html");
-		assertModelNotEmpty();
-		assertContains(RDF.TYPE, vVCARD.VCard);
-		// assertContains(RDF.TYPE, vVCARD.Organization);
-		assertContains(RDF.TYPE, vVCARD.Name);
-		// assertContains(vVCARD.organization_name, "Technorati");
-		Resource person = findExactlyOneBlankSubject(vVCARD.fn,
-				RDFUtils.literal("Tantek Celik"));
-		Assert.assertNotNull(person);
-		Resource org = findExactlyOneBlankSubject(vVCARD.organization_name,
-				RDFUtils.literal("Technorati"));
-		Assert.assertNotNull(org);
-		assertContains(person, vVCARD.url, RDFUtils.iri("http://tantek.com/"));
-		assertContains(person, vVCARD.n, (Resource) null);
-		assertContains(person, vVCARD.org, (Resource) null);
-	}
-
-	@Test
-	public void testMultipleclassNamesOnVCard() throws Exception {
-		assertExtract("/microformats/hcard/02-multiple-class-names-on-vcard.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
-		Resource name;
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		while (statements.hasNext()) {
-			name = statements.next().getSubject();
-			assertContains(name, vVCARD.fn, "Ryan King");
-		}
-	}
-
-	@Test
-	public void testImpliedNames() throws Exception {
-		String[] ns = { "Ryan King", "King", "Ryan",
-
-		"Ryan King", "King", "Ryan",
-
-		"Ryan King", "King", "Ryan",
-
-		"Brian Suda", "Suda", "Brian",
-
-		"King, Ryan", "King", "Ryan",
-
-		"King, R", "King", "R",
-
-		"King R", "R", "King",
-
-		"R King", "King", "R",
-
-		"King R.", "R.", "King",
-
-		"Jesse James Garrett", "Garrett", "Jesse",
-
-		"Thomas Vander Wall", "Wall", "Thomas" };
-		List<String> NAMES = Arrays.asList(ns);
-		assertExtract("/microformats/hcard/03-implied-n.html");
-		assertModelNotEmpty();
-
-		RepositoryResult<Statement> statements = getStatements(null, vVCARD.fn,
-				null);
-		Resource vcard;
-		int count = 0;
-		try {
-			while (statements.hasNext()) {
-				vcard = statements.next().getSubject();
-				assertContains(vcard, RDF.TYPE, vVCARD.VCard);
-				Resource name = findObjectAsResource(vcard, vVCARD.n);
-
-				final String objLiteral = findObjectAsLiteral(vcard, vVCARD.fn);
-				int idx = NAMES.indexOf(objLiteral);
-				Assert.assertTrue(
-						String.format("not in names: '%s'", objLiteral),
-						idx >= 0);
-				Assert.assertEquals(NAMES.get(idx + 1),
-						findObjectAsLiteral(name, vVCARD.family_name));
-				Assert.assertEquals(NAMES.get(idx + 2),
-						findObjectAsLiteral(name, vVCARD.given_name));
-				count++;
-			}
-		} finally {
-			statements.close();
-		}
-		Assert.assertEquals(10, count);
-	}
-
-	@Test
-	public void testIgnoreUnknowns() throws Exception {
-		assertExtract("/microformats/hcard/04-ignore-unknowns.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.fn, "Ryan King");
-		assertContains(vVCARD.n, (Resource) null);
-		assertContains(null, "Ryan");
-		assertContains(vVCARD.given_name, "Ryan");
-		assertContains(vVCARD.family_name, "King");
-	}
-
-	@Test
-	public void testMailto1() throws Exception {
-		assertExtract("/microformats/hcard/05-mailto-1.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.fn, "Ryan King");
-		assertContains(RDF.TYPE, vVCARD.Name);
-
-		assertContains(vVCARD.email, RDFUtils.iri("mailto:ryan@technorati.com"));
-
-		assertContains(vVCARD.given_name, "Ryan");
-		assertContains(vVCARD.family_name, "King");
-	}
-
-	@Test
-	public void testMailto2() throws Exception {
-		assertExtract("/microformats/hcard/06-mailto-2.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.fn, "Brian Suda");
-
-		assertContains(vVCARD.email, RDFUtils.iri("mailto:brian@example.com"));
-		assertContains(vVCARD.given_name, "Brian");
-		assertContains(vVCARD.family_name, "Suda");
-	}
-
-	@Test
-	public void testRelativeUrl() throws Exception {
-		assertExtract("/microformats/hcard/07-relative-url.html");
-		assertDefaultVCard();
-		assertJohn();
-		assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
-	}
-
-	@Test
-	public void testRelativeUrlBase() throws Exception {
-		assertExtract("/microformats/hcard/08-relative-url-base.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
-		assertJohn();
-	}
-
-	@Test
-	public void testRelativeUrlXmlBase1() throws Exception {
-		assertExtract("/microformats/hcard/09-relative-url-xmlbase-1.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
-		assertJohn();
-	}
-
-	@Test
-	public void testRelativeUrlXmlBase2() throws Exception {
-		assertExtract("/microformats/hcard/10-relative-url-xmlbase-2.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
-		assertJohn();
-	}
-
-	@Test
-	public void testMultipleUrls() throws Exception {
-		assertExtract("/microformats/hcard/11-multiple-urls.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/foo")));
-		assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/bar")));
-
-		assertJohn();
-	}
-
-	@Test
-	public void testImageSrc() throws Exception {
-		assertExtract("/microformats/hcard/12-img-src-url.html");
-		assertDefaultVCard();
-		assertJohn();
-	}
-
-	@Test
-	public void testPhotoLogo() throws Exception {
-		assertExtract("/microformats/hcard/13-photo-logo.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.photo,
-				RDFUtils.iri(("http://example.org/picture1.png")));
-		assertContains(vVCARD.photo,
-				RDFUtils.iri(("http://example.org/picture2.png")));
-		assertContains(vVCARD.logo,
-				RDFUtils.iri(("http://example.org/picture1.png")));
-		assertContains(vVCARD.logo,
-				RDFUtils.iri(("http://example.org/picture2.png")));
-		assertJohn();
-	}
-
-	@Test
-	public void testImgSrcDataUrl() throws Exception {
-		assertExtract("/microformats/hcard/14-img-src-data-url.html");
-		assertDefaultVCard();
-		Resource data = RDFUtils.iri("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAMAAAAp4XiDAAAABGdBTUEAAK/"
-		+ "INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAASUExURf///8zMzJmZmWZmZ"
-		+ "jMzMwAAAPOPemkAAAM1SURBVHjaYmBgYGBkYQUBFkYWFiCPCchixQAMCCZAACF0MAMVM4K4TFh0IGsBCC"
-		+ "AkOxhYmBnAAKaHhZkZmxaAAGJgYIbpYGBihGgBWsTMzMwE4jIhaWGAYoAAYmCECDExYAcwGxkg5oNIgAB"
-		+ "igDqLARdgZmGB2wICrKwAAcSA3xKgIxlZ0PwCEEAMBCxhgHoWSQtAADFAAxgfYEJ1GEAAQbQw4tUCsocB"
-		+ "YQVAADEgu4uRkREeUCwszEwwLhOKLQABhNDCBA4aSDgwwhIAJKqYUPwCEEAMUK/AUwnc9aywJMCI7DAgA"
-		+ "AggBohZ8JTBhGIJzCoWZL8ABBCYidAB8RUjWppkYUG2BSCAGMDqEMZiswUtXgACiAHsFYixTMywGGLGpgU"
-		+ "WYgABxAA2mQkWCMyMqFoYmdD8ACQAAogBHJHMrCxg1cyIiICmCkYWDFsAAgiihYmZCewFFpR0BfI3LLch+"
-		+ "QUggBiQ0iQjEyMDmh54qCBlUIAAYsCRJsElADQvgWKTlRGeKwECiAF3XgGmMEYQYADZzcoA9z5AAMG9RQC"
-		+ "AtEC9DxBADFiyFyMjVi0wABBAWLQwQdIiuhYGWJIACCBg+KKUJ9BoBRdS2LQALQMIIGDQIEmwAO1kYcVWH"
-		+ "CDZAhBAqFqYmOAxj2YNtAwDAYAAYmDEiBYWzHKKkRERYiwAAYSphZEZwxZGZiZQVEJTJkAAMTCyokc7M5o"
-		+ "ORlC5wcoEjxeAAAJqQXU0UB6W5WFmABMtEzMi1wEEEFAbE0YyAUuzMMEsYQalMkQSBQggUDmNPU3C9IA4L"
-		+ "CxI+QUggEBiKOU8yExgqccCL3chnkPKlQABhGo6ejHBDKmdUHMlQAAhhQvQaGZGkBIkjcAMywLmI+VKgAB"
-		+ "CSowsTJhZkhlWXiBpAQggYBqBZl9GVOdBcz0LZqEEEEAMqLULMBLg1THWog9IAwQQA0qiZcRW5aPbAhBAD"
-		+ "Cg1El4tMAAQQAxoiZYZXnTh1AIQQAzo2QlYpDDjcBgrxGEAAcSAJTthswmiBUwDBBC2GpkZJTaRvQ+mAQK"
-		+ "IAUuuxdZWQvILQABBmSxMjBj5EpcWgACCMoFOYYSpZyHQHgMIMACt2hmoVEikCQAAAABJRU5ErkJggg==");
-
-		assertContains(vVCARD.photo, data);
-		assertContains(vVCARD.logo, data);
-		assertJohn();
-	}
-
-	@Test
-	public void testHonorificAdditionalSingle() throws Exception {
-		assertExtract("/microformats/hcard/15-honorific-additional-single.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.fn, "Mr. John Maurice Doe, Ph.D.");
-
-		assertContains(vVCARD.honorific_prefix, "Mr.");
-		assertContains(vVCARD.honorific_suffix, "Ph.D.");
-
-		assertContains(vVCARD.given_name, "John");
-		assertContains(vVCARD.additional_name, "Maurice");
-		assertContains(vVCARD.family_name, "Doe");
-	}
-
-	@Test
-	public void testHonorificAdditionalMultiple() throws Exception {
-		assertExtract("/microformats/hcard/16-honorific-additional-multiple.html");
-		assertDefaultVCard();
-		assertContains(vVCARD.honorific_prefix, "Mr.");
-		assertContains(vVCARD.honorific_prefix, "Dr.");
-
-		assertContains(vVCARD.honorific_suffix, "Ph.D.");
-		assertContains(vVCARD.honorific_suffix, "J.D.");
-
-		assertContains(vVCARD.given_name, "John");
-		assertContains(vVCARD.additional_name, "Maurice");
-		assertContains(vVCARD.additional_name, "Benjamin");
-		assertContains(vVCARD.family_name, "Doe");
-
-		assertContains(vVCARD.fn,
-				"Mr. Dr. John Maurice Benjamin Doe Ph.D., J.D.");
-	}
-
-	@Test
-	public void testEMailNotUri() throws Exception {
-		assertExtract("/microformats/hcard/17-email-not-uri.html");
-		assertDefaultVCard();
-		assertJohn();
-		assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
-	}
-
-	@Test
-	public void testObjectDataHttpUri() throws Exception {
-		assertExtract("/microformats/hcard/18-object-data-http-uri.html");
-		assertDefaultVCard();
-		assertJohn();
-	}
-
-	@Ignore
-	@Test
-	public void testObjectDataDataUri() throws Exception {
-		assertExtract("/microformats/hcard/19-object-data-data-uri.html");
-		assertDefaultVCard();
-		assertJohn();
-
-		assertContains(vVCARD.photo, (Resource) null);
-		assertContains(vVCARD.logo, (Resource) null);
-	}
-
-	@Test
-	public void testImgAlt() throws Exception {
-		assertExtract("/microformats/hcard/20-image-alt.html");
-		assertDefaultVCard();
-		Resource uri = RDFUtils.iri("http://example.com/foo.png");
-		assertContains(vVCARD.photo, uri);
-		assertContains(vVCARD.logo, uri);
-		assertJohn();
-	}
-
-	@Test
-	public void testAdr() throws Exception {
-		assertExtract("/microformats/hcard/22-adr.html");
-		assertDefaultVCard();
-		assertJohn();
-		assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
-	}
-
-	@Test
-	public void testBirthDayDate() throws Exception {
-		assertExtract("/microformats/hcard/27-bday-date.html");
-		assertModelNotEmpty();
-		assertContains(vVCARD.fn, "john doe");
-		assertContains(vVCARD.given_name, "john");
-		assertContains(vVCARD.family_name, "doe");
-		assertContains(vVCARD.bday, "2000-01-01");
-	}
-
-	@Test
-	public void testBirthDayDateTime() throws Exception {
-		assertExtract("/microformats/hcard/28-bday-datetime.html");
-		assertModelNotEmpty();
-		assertContains(vVCARD.fn, "john doe");
-		assertContains(vVCARD.given_name, "john");
-		assertContains(vVCARD.family_name, "doe");
-		assertContains(vVCARD.bday, "2000-01-01T00:00:00");
-	}
-
-	@Test
-	public void testBirthDayDateTimeTimeZone() throws Exception {
-		assertExtract("/microformats/hcard/29-bday-datetime-timezone.html");
-		assertModelNotEmpty();
-		assertContains(vVCARD.fn, "john doe");
-		assertContains(vVCARD.given_name, "john");
-		assertContains(vVCARD.family_name, "doe");
-		assertContains(vVCARD.bday, "2000-01-01T00:00:00-0800");
-	}
-
-	@Test
-	public void testArea() throws Exception {
-		assertExtract("/microformats/hcard/33-area.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		try {
-			while (statements.hasNext()) {
-				Resource vcard = statements.next().getSubject();
-
-				Assert.assertNotNull(findObject(vcard, vVCARD.fn));
-				Assert.assertEquals("Joe Public",
-						findObjectAsLiteral(vcard, vVCARD.fn));
-				Assert.assertNotNull(findObject(vcard, vVCARD.url));
-				String url = findObjectAsLiteral(vcard, vVCARD.url);
-				Assert.assertNotNull(findObject(vcard, vVCARD.email));
-				String mail = findObjectAsLiteral(vcard, vVCARD.email);
-				Assert.assertEquals("http://example.com/", url);
-				Assert.assertEquals("mailto:joe@example.com", mail);
-			}
-		} finally {
-			statements.close();
-		}
-
-		// Check that there are 4 organizations.
-		assertStatementsSize(RDF.TYPE, vVCARD.Organization, 4);
-		statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
-		try {
-			while (statements.hasNext()) {
-				Resource org = statements.next().getSubject();
-				assertContains(null, vVCARD.org, org);
-				Assert.assertNotNull(findObject(org, vVCARD.organization_name));
-				Assert.assertEquals("Joe Public",
-						findObjectAsLiteral(org, vVCARD.organization_name));
-			}
-		} finally {
-			statements.close();
-		}
-	}
-
-	@Test
-	public void testNotes() throws Exception {
-		final String[] NOTES = { "Note 1", "Note 3",
-				"Note 4 with a ; and a , to be escaped" };
-
-		assertExtract("/microformats/hcard/34-notes.html");
-		assertModelNotEmpty();
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		try {
-			while (statements.hasNext()) {
-				Resource vcard = statements.next().getSubject();
-				String fn = findObjectAsLiteral(vcard, vVCARD.fn);
-				String mail = findObjectAsLiteral(vcard, vVCARD.email);
-				Assert.assertEquals("Joe Public", fn);
-				Assert.assertEquals("mailto:joe@example.com", mail);
-			}
-		} finally {
-			statements.close();
-		}
-		for (String note : NOTES) {
-			assertContains(vVCARD.note, note);
-		}
-	}
-
-	@Test
-	public void testIncludePattern() throws Exception {
-		assertExtract("/microformats/hcard/35-include-pattern.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
-
-		RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
-				vVCARD.Name);
-		try {
-			while (statements.hasNext()) {
-				Resource name = statements.next().getSubject();
-				Assert.assertNotNull(findObject(name, vVCARD.given_name));
-				String gn = findObjectAsLiteral(name, vVCARD.given_name);
-				Assert.assertEquals("James", gn);
-				Assert.assertNotNull(findObject(name, vVCARD.family_name));
-				String fn = findObjectAsLiteral(name, vVCARD.family_name);
-				Assert.assertEquals("Levine", fn);
-			}
-		} finally {
-			statements.close();
-		}
-
-		assertStatementsSize(RDF.TYPE, vVCARD.Organization, 2);
-		statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
-		try {
-			while (statements.hasNext()) {
-				Resource org = statements.next().getSubject();
-				Assert.assertNotNull(findObject(org, vVCARD.organization_name));
-				Assert.assertEquals("SimplyHired",
-						findObjectAsLiteral(org, vVCARD.organization_name));
-
-				RepositoryResult<Statement> statements2 = getStatements(null,
-						vVCARD.org, org);
-				try {
-					while (statements2.hasNext()) {
-						Resource vcard = statements2.next().getSubject();
-						Assert.assertNotNull(findObject(vcard, vVCARD.title));
-						Assert.assertEquals("Microformat Brainstormer",
-								findObjectAsLiteral(vcard, vVCARD.title));
-					}
-				} finally {
-					statements2.close();
-				}
-			}
-		} finally {
-			statements.close();
-		}
-	}
-
-	@Test
-	public void testUid() throws Exception {
-		assertExtract("/microformats/hcard/38-uid.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
-		RepositoryResult<Statement> iter = getStatements(null, RDF.TYPE,
-				vVCARD.VCard);
-		while (iter.hasNext()) {
-			Resource vcard = iter.next().getSubject();
-			Assert.assertNotNull(findObject(vcard, vVCARD.fn));
-			String fn = findObjectAsLiteral(vcard, vVCARD.fn);
-			Assert.assertNotNull(findObject(vcard, vVCARD.url));
-			String url = findObjectAsLiteral(vcard, vVCARD.url);
-			Assert.assertNotNull(findObject(vcard, vVCARD.uid));
-			String uid = findObjectAsLiteral(vcard, vVCARD.uid);
-			Assert.assertEquals("Ryan King", fn);
-			Assert.assertEquals("http://theryanking.com/contact/", url);
-			Assert.assertEquals("http://theryanking.com/contact/", uid);
-
-		}
-	}
-
-	@Test
-	public void testIgnoreChildren() throws Exception {
-		assertExtract("/microformats/hcard/41-ignore-children.html");
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
-		assertContains(vVCARD.fn, "Melanie Kl\u00f6\u00df");
-		assertContains(vVCARD.email, RDFUtils.iri("mailto:mkloes@gmail.com"));
-		assertContains(vVCARD.adr, (Resource) null);
-		assertNotContains(null, vVCARD.postal_code, "53127");
-		assertNotContains(null, vVCARD.locality, "Bonn");
-		assertNotContains(null, vVCARD.street_address, "Ippendorfer Weg. 24");
-		assertNotContains(null, vVCARD.country_name, "Germany");
-	}
-
-	/**
-	 * Tests that the HCardName data is not cumulative and is cleaned up at each
-	 * extraction.
-	 *
-	 * @throws Exception
-	 */
-	@Test
-	public void testCumulativeHNames() throws Exception {
-		assertExtract("/microformats/hcard/linkedin-michelemostarda.html");
-		assertModelNotEmpty();
-		assertStatementsSize(vVCARD.given_name, "Michele", 7);
-		assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
-	}
-
-	/**
-	 * Tests the detection and prevention of the inclusion of an ancestor by a
-	 * sibling node. This test is related to issue <a
-	 * href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
-	 *
-	 * @throws IOException
-	 * @throws ExtractionException
-	 */
-	@Test
-	public void testInfiniteLoop() throws IOException, ExtractionException {
-		assertExtract("/microformats/hcard/infinite-loop.html", false);
-		assertIssue(IssueReport.IssueLevel.WARNING,
-				".*Current node tries to include an ancestor node.*");
-	}
-
-	/**
-	 * Tests extractor performances. This test is related to issue <a
-	 * href="https://issues.apache.org/jira/browse/ANY23-76">ANY23-76</a>.
-	 */
-	@Ignore
-	@Test(timeout = 30 * 1000)
-	public void testExtractionPerformance() {
-		assertExtract("/microformats/hcard/performance.html");
-	}
-
-	private void assertDefaultVCard() throws Exception {
-		assertModelNotEmpty();
-		assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
-	}
-
-	private void assertJohn() throws Exception {
-		assertContains(vVCARD.fn, "John Doe");
-		assertContains(vVCARD.given_name, "John");
-		assertContains(vVCARD.family_name, "Doe");
-	}
+  private static final VCard vVCARD = VCard.getInstance();
+
+  protected ExtractorFactory<?> getExtractorFactory() {
+    return new HCardExtractorFactory();
+  }
+
+  @Test
+  public void testEMailNotUriReal() throws Exception {
+    assertExtract("/microformats/hcard/17-email-not-uri.html");
+    assertDefaultVCard();
+    assertJohn();
+    assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
+  }
+
+  @Test
+  public void testTel() throws Exception {
+    assertExtract("/microformats/hcard/21-tel.html");
+    assertDefaultVCard();
+    String[] tels = { "+1.415.555.1231", "+1.415.555.1235",
+            "+1.415.555.1236", "+1.415.555.1237", "+1.415.555.1238",
+            "+1.415.555.1239", "+1.415.555.1240", "+1.415.555.1241",
+            "+1.415.555.1242", "+1.415.555.1243" };
+    for (String tel : tels) {
+      assertContains(vVCARD.tel, RDFUtils.iri("tel:" + tel));
+    }
+    Resource telResource = RDFUtils.iri("tel:+14155551233");
+    assertContains(vVCARD.fax, telResource);
+    assertContains(vVCARD.workTel, telResource);
+    assertContains(vVCARD.homeTel, telResource);
+    assertJohn();
+  }
+
+  @Test
+  public void testAbbrTitleEverything() throws Exception {
+    assertExtract("/microformats/hcard/23-abbr-title-everything.html");
+    assertDefaultVCard();
+
+    assertContains(vVCARD.fn, "John Doe");
+    assertContains(vVCARD.nickname, "JJ");
+
+    assertContains(vVCARD.given_name, "Jonathan");
+    assertContains(vVCARD.additional_name, "John");
+    assertContains(vVCARD.family_name, "Doe-Smith");
+    assertContains(vVCARD.honorific_suffix, "Medical Doctor");
+
+    assertContains(vVCARD.title, "President");
+    assertContains(vVCARD.role, "Chief");
+    assertContains(vVCARD.tz, "-0700");
+    assertContains(vVCARD.bday, "2006-04-04");
+    assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
+    assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
+    assertContains(vVCARD.class_, "public");
+    assertContains(vVCARD.note, "this is a note");
+    assertContains(vVCARD.organization_name, "Intellicorp");
+    assertContains(vVCARD.organization_unit, "Intelligence");
+
+    // We define the property in this extractor _but_ we do not parse it.
+    assertContains(vVCARD.geo, (Resource) null);
+    // Thus we do not cointain these.
+    // The interaction is in @link RDFMergerTest.java
+    assertNotContains(RDF.TYPE, vVCARD.Location);
+    assertNotContains(null, vVCARD.latitude, "37.77");
+    assertNotContains(null, vVCARD.longitude, "-122.41");
+
+    // see above
+    assertContains(vVCARD.adr, (Resource) null);
+    assertNotContains(RDF.TYPE, vVCARD.Address);
+    assertNotContains(null, vVCARD.post_office_box, "Box 1234");
+    assertNotContains(null, vVCARD.extended_address, "Suite 100");
+    assertNotContains(null, vVCARD.street_address, "123 Fake Street");
+    assertNotContains(null, vVCARD.locality, "San Francisco");
+    assertNotContains(null, vVCARD.region, "California");
+    assertNotContains(null, vVCARD.postal_code, "12345-6789");
+    assertNotContains(null, vVCARD.country_name, "United States of America");
+    assertNotContains(null, vVCARD.addressType, "work");
+  }
+
+  @Test
+  public void testGeoAbbr() throws Exception {
+    assertExtract("/microformats/hcard/25-geo-abbr.html");
+    assertModelNotEmpty();
+    assertContains(vVCARD.fn, "Paradise");
+    assertContains(RDF.TYPE, vVCARD.Organization);
+    assertContains(vVCARD.organization_name, "Paradise");
+    // See above: geo property yes, gteo blank node no.
+    assertContains(vVCARD.geo, (Resource) null);
+    assertNotContains(RDF.TYPE, vVCARD.Location);
+    assertNotContains(null, vVCARD.latitude, "30.267991");
+    assertNotContains(null, vVCARD.longitude, "-97.739568");
+  }
+
+  @Test
+  public void testAncestors() throws Exception {
+    assertExtract("/microformats/hcard/26-ancestors.html");
+    assertModelNotEmpty();
+
+    assertContains(vVCARD.fn, "John Doe");
+    assertNotContains(null, vVCARD.fn,
+            "Mister Jonathan John Doe-Smith Medical Doctor");
+    assertContains(vVCARD.nickname, "JJ");
+    assertNotContains(RDF.TYPE, vVCARD.Address);
+    assertContains(vVCARD.tz, "-0700");
+    assertContains(vVCARD.title, "President");
+    assertContains(vVCARD.role, "Chief");
+    assertContains(vVCARD.organization_name, "Intellicorp");
+    assertContains(vVCARD.organization_unit, "Intelligence");
+
+    assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
+    assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
+    assertContains(vVCARD.note, "this is a note");
+    assertContains(vVCARD.class_, "public");
+
+    assertNotContains(RDF.TYPE, vVCARD.Location);
+    assertContains(vVCARD.geo, (Resource) null);
+    assertNotContains(null, vVCARD.latitude, "37.77");
+    assertNotContains(null, vVCARD.longitude, "-122.41");
+
+    assertContains(RDF.TYPE, vVCARD.Name);
+    assertContains(vVCARD.additional_name, "John");
+    assertContains(vVCARD.given_name, "Jonathan");
+    assertContains(vVCARD.family_name, "Doe-Smith");
+    assertContains(vVCARD.honorific_prefix, "Mister");
+    assertContains(vVCARD.honorific_suffix, "Medical Doctor");
+
+    assertNotContains(null, vVCARD.post_office_box, "Box 1234");
+    assertNotContains(null, vVCARD.extended_address, "Suite 100");
+    assertNotContains(null, vVCARD.street_address, "123 Fake Street");
+    assertNotContains(null, vVCARD.locality, "San Francisco");
+    assertNotContains(null, vVCARD.region, "California");
+    assertNotContains(null, vVCARD.postal_code, "12345-6789");
+    assertNotContains(null, vVCARD.country_name, "United States of America");
+    assertNotContains(null, vVCARD.addressType, "work");
+  }
+
+  @Test
+  public void testfnOrg() throws Exception {
+    assertExtract("/microformats/hcard/30-fn-org.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+    RepositoryResult<Statement> repositoryResult = getStatements(null,
+            RDF.TYPE, vVCARD.VCard);
+    try {
+      while (repositoryResult.hasNext()) {
+        Resource card = repositoryResult.next().getSubject();
+        assertNotNull(findObject(card, vVCARD.fn));
+        String name = findObjectAsLiteral(card, vVCARD.fn);
+
+        assertNotNull(findObject(card, vVCARD.org));
+        Resource org = findObjectAsResource(card, vVCARD.org);
+        assertNotNull(findObject(org, vVCARD.organization_name));
+
+        if (name.equals("Dan Connolly")) {
+          assertNotNull(findObject(card, vVCARD.n));
+          assertFalse(name.equals(org.stringValue()));
+        }
+      }
+    } finally {
+      repositoryResult.close();
+    }
+  }
+
+  @Test
+  public void testInclude() throws Exception {
+    assertExtract("/microformats/hcard/31-include.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
+    assertStatementsSize(vVCARD.email, (Value) null, 3);
+
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    try {
+      while (statements.hasNext()) {
+        Resource vcard = statements.next().getSubject();
+
+        assertNotNull(findObject(vcard, vVCARD.fn));
+        assertEquals("Brian Suda",
+                findObjectAsLiteral(vcard, vVCARD.fn));
+
+        assertNotNull(findObject(vcard, vVCARD.url));
+        String url = findObjectAsResource(vcard, vVCARD.url)
+                .stringValue();
+        assertEquals("http://suda.co.uk/", url);
+
+        Resource name = findObjectAsResource(vcard, vVCARD.n);
+        assertEquals("Brian",
+                findObjectAsLiteral(name, vVCARD.given_name));
+        assertEquals("Suda",
+                findObjectAsLiteral(name, vVCARD.family_name));
+
+        // Included data.
+        assertNotNull(findObject(vcard, vVCARD.email));
+        String mail = findObjectAsLiteral(vcard, vVCARD.email);
+        assertEquals("mailto:correct@example.com", mail);
+      }
+    } finally {
+      statements.close();
+    }
+  }
+
+  @Test
+  public void testHeader() throws Exception {
+    assertExtract("/microformats/hcard/32-header.html");
+    assertModelNotEmpty();
+    // check fn, name, family, nick.
+    assertJohn();
+
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    try {
+      Resource example = RDFUtils.iri("http://example.org/");
+      while (statements.hasNext()) {
+        Resource card = statements.next().getSubject();
+        assertNotNull(findObject(card, vVCARD.fn));
+
+        String fn = findObjectAsLiteral(card, vVCARD.fn);
+        if ("Jane Doe".equals(fn)) {
+          assertNotFound(card, vVCARD.org);
+        } else {
+          assertTrue("John Doe".equals(fn)
+                  || "Brian Suda".equals(fn));
+
+          assertNotNull(findObject(card, vVCARD.url));
+          assertEquals(example,
+                  findObjectAsResource(card, vVCARD.url));
+
+          assertNotNull(findObject(card, vVCARD.org));
+          Resource org = findObjectAsResource(card, vVCARD.org);
+          assertContains(org, RDF.TYPE, vVCARD.Organization);
+          assertNotNull(org);
+          assertNotNull(findObject(card, vVCARD.org));
+          assertNotNull(findObject(org,
+                  vVCARD.organization_name));
+          assertEquals("example.org",
+                  findObjectAsLiteral(org, vVCARD.organization_name));
+        }
+      }
+      // Just to be sure there are no spurious statements.
+      // assertStatementsSize(VCARD.org, null, 2);
+      assertStatementsSize(vVCARD.url, example, 2);
+    } finally {
+      statements.close();
+    }
+  }
+
+  @Test
+  public void testAreaFull() throws Exception {
+    assertExtract("/microformats/hcard/33-area.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
+
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    while (statements.hasNext()) {
+      Resource vcard = statements.next().getSubject();
+      final Value fnValue = findObject(vcard, vVCARD.fn);
+      assertNotNull(fnValue);
+      String fn = fnValue.stringValue();
+      final Value vcardValue = findObject(vcard, vVCARD.url);
+      assertNotNull(vcardValue);
+      String url = vcardValue.stringValue();
+      final Value emailValue = findObject(vcard, vVCARD.email);
+      assertNotNull(emailValue);
+      String mail = emailValue.stringValue();
+      assertEquals("Joe Public", fn);
+      assertEquals("http://example.com/", url);
+      assertEquals("mailto:joe@example.com", mail);
+    }
+  }
+
+  @Test
+  public void testCategories() throws Exception {
+    assertExtract("/microformats/hcard/36-categories.html");
+    assertModelNotEmpty();
+    assertContains(vVCARD.given_name, "Joe");
+    assertContains(vVCARD.given_name, "john");
+    assertContains(vVCARD.family_name, "doe");
+    assertContains(vVCARD.family_name, "User");
+    assertContains(vVCARD.fn, "john doe");
+    assertContains(vVCARD.fn, "Joe User");
+
+    assertContains(vVCARD.category, "C1");
+    assertContains(vVCARD.category, "C2a");
+    assertContains(vVCARD.category, "C4");
+    assertContains(vVCARD.category, "User");
+    String[] cats = { "C3", "C5", "C6", "C7", "C9", "luser", "D1", "D2",
+    "D3" };
+    for (String cat : cats)
+      assertContains(vVCARD.category, "http://example.com/tag/" + cat);
+
+    assertNotContains(null, vVCARD.category, "D4");
+  }
+
+  @Test
+  public void testSingleton() throws Exception {
+    // this tests probably tests that e just get the first fn and so on
+    assertExtract("/microformats/hcard/37-singleton.html");
+    assertModelNotEmpty();
+    assertStatementsSize(vVCARD.fn, (Value) null, 1);
+    assertContains(vVCARD.fn, "john doe 1");
+
+    assertStatementsSize(RDF.TYPE, vVCARD.Name, 1);
+    assertStatementsSize(vVCARD.given_name, (Value) null, 1);
+    assertContains(vVCARD.given_name, "john");
+    assertStatementsSize(vVCARD.family_name, (Value) null, 1);
+    assertContains(vVCARD.family_name, "doe");
+    assertStatementsSize(vVCARD.sort_string, (Value) null, 1);
+    assertContains(vVCARD.sort_string, "d");
+
+    assertStatementsSize(vVCARD.bday, (Value) null, 1);
+    assertContains(vVCARD.bday, "20060707");
+    assertStatementsSize(vVCARD.rev, (Value) null, 1);
+    assertContains(vVCARD.rev, "20060707");
+    assertStatementsSize(vVCARD.class_, (Value) null, 1);
+    assertContains(vVCARD.class_, "public");
+    assertStatementsSize(vVCARD.tz, (Value) null, 1);
+    assertContains(vVCARD.tz, "+0600");
+
+    // Why 0? because the extractor does not look at geo uF!
+    assertStatementsSize(RDF.TYPE, vVCARD.Location, 0);
+    assertStatementsSize(vVCARD.geo, (Value) null, 2);
+
+    assertNotContains(null, vVCARD.latitude, "123.45");
+    assertNotContains(null, vVCARD.longitude, "67.89");
+
+    assertStatementsSize(vVCARD.uid, (Value) null, 1);
+    assertContains(vVCARD.uid, "unique-id-1");
+  }
+
+  @Test
+  public void testUidFull() throws Exception {
+    assertExtract("/microformats/hcard/38-uid.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+
+    try {
+      while (statements.hasNext()) {
+        Resource vcard = statements.next().getSubject();
+        assertNotNull(findObject(vcard, vVCARD.fn));
+        String fn = findObjectAsLiteral(vcard, vVCARD.fn);
+        assertEquals("Ryan King", fn);
+
+        assertNotNull(findObject(vcard, vVCARD.n));
+        Resource n = findObjectAsResource(vcard, vVCARD.n);
+        assertNotNull(n);
+        assertNotNull(findObject(n, vVCARD.given_name));
+        assertEquals("Ryan",
+                findObjectAsLiteral(n, vVCARD.given_name));
+        assertNotNull(findObject(n, vVCARD.family_name));
+        assertEquals("King",
+                findObjectAsLiteral(n, vVCARD.family_name));
+
+        assertNotNull(findObject(vcard, vVCARD.url));
+        Resource url = findObjectAsResource(vcard, vVCARD.url);
+
+        assertNotNull(findObject(vcard, vVCARD.uid));
+        String uid = findObjectAsLiteral(vcard, vVCARD.uid);
+
+        assertEquals("http://theryanking.com/contact/",
+                url.stringValue());
+        assertEquals("http://theryanking.com/contact/", uid);
+      }
+    } finally {
+      statements.close();
+    }
+  }
+
+  @Test
+  public void testRomanianWikipedia() throws Exception {
+    assertExtract("/microformats/hcard/40-fn-inside-adr.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+
+    try {
+      while (statements.hasNext()) {
+        Resource card = statements.next().getSubject();
+        assertNotNull(findObject(card, vVCARD.fn));
+        String fn = findObjectAsLiteral(card, vVCARD.fn);
+        assertEquals("Berlin", fn);
+
+        assertNotNull(findObject(card, vVCARD.org));
+        Resource org = findObjectAsResource(card, vVCARD.org);
+        assertContains(org, RDF.TYPE, vVCARD.Organization);
+        assertNotNull(org);
+        assertNotNull(findObject(card, vVCARD.org));
+        assertNotNull(findObject(org, vVCARD.organization_name));
+        assertEquals("Berlin",
+                findObjectAsLiteral(org, vVCARD.organization_name));
+
+      }
+    } finally {
+      statements.close();
+    }
+  }
+
+  @Test
+  public void testNoMicroformats() throws Exception, IOException,
+  ExtractionException {
+    extract("/html/html-without-uf.html");
+    assertModelEmpty();
+  }
+
+  @Test
+  public void testBasic() throws Exception {
+    assertExtract("/microformats/hcard/01-tantek-basic.html");
+    assertModelNotEmpty();
+    assertContains(RDF.TYPE, vVCARD.VCard);
+    // assertContains(RDF.TYPE, vVCARD.Organization);
+    assertContains(RDF.TYPE, vVCARD.Name);
+    // assertContains(vVCARD.organization_name, "Technorati");
+    Resource person = findExactlyOneBlankSubject(vVCARD.fn,
+            RDFUtils.literal("Tantek Celik"));
+    assertNotNull(person);
+    Resource org = findExactlyOneBlankSubject(vVCARD.organization_name,
+            RDFUtils.literal("Technorati"));
+    assertNotNull(org);
+    assertContains(person, vVCARD.url, RDFUtils.iri("http://tantek.com/"));
+    assertContains(person, vVCARD.n, (Resource) null);
+    assertContains(person, vVCARD.org, (Resource) null);
+  }
+
+  @Test
+  public void testMultipleclassNamesOnVCard() throws Exception {
+    assertExtract("/microformats/hcard/02-multiple-class-names-on-vcard.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+    Resource name;
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    while (statements.hasNext()) {
+      name = statements.next().getSubject();
+      assertContains(name, vVCARD.fn, "Ryan King");
+    }
+  }
+
+  @Test
+  public void testImpliedNames() throws Exception {
+    String[] ns = { "Ryan King", "King", "Ryan",
+
+            "Ryan King", "King", "Ryan",
+
+            "Ryan King", "King", "Ryan",
+
+            "Brian Suda", "Suda", "Brian",
+
+            "King, Ryan", "King", "Ryan",
+
+            "King, R", "King", "R",
+
+            "King R", "R", "King",
+
+            "R King", "King", "R",
+
+            "King R.", "R.", "King",
+
+            "Jesse James Garrett", "Garrett", "Jesse",
+
+            "Thomas Vander Wall", "Wall", "Thomas" };
+    List<String> NAMES = Arrays.asList(ns);
+    assertExtract("/microformats/hcard/03-implied-n.html");
+    assertModelNotEmpty();
+
+    RepositoryResult<Statement> statements = getStatements(null, vVCARD.fn,
+            null);
+    Resource vcard;
+    int count = 0;
+    try {
+      while (statements.hasNext()) {
+        vcard = statements.next().getSubject();
+        assertContains(vcard, RDF.TYPE, vVCARD.VCard);
+        Resource name = findObjectAsResource(vcard, vVCARD.n);
+
+        final String objLiteral = findObjectAsLiteral(vcard, vVCARD.fn);
+        int idx = NAMES.indexOf(objLiteral);
+        assertTrue(
+                String.format("not in names: '%s'", objLiteral),
+                idx >= 0);
+        assertEquals(NAMES.get(idx + 1),
+                findObjectAsLiteral(name, vVCARD.family_name));
+        assertEquals(NAMES.get(idx + 2),
+                findObjectAsLiteral(name, vVCARD.given_name));
+        count++;
+      }
+    } finally {
+      statements.close();
+    }
+    assertEquals(10, count);
+  }
+
+  @Test
+  public void testIgnoreUnknowns() throws Exception {
+    assertExtract("/microformats/hcard/04-ignore-unknowns.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.fn, "Ryan King");
+    assertContains(vVCARD.n, (Resource) null);
+    assertContains(null, "Ryan");
+    assertContains(vVCARD.given_name, "Ryan");
+    assertContains(vVCARD.family_name, "King");
+  }
+
+  @Test
+  public void testMailto1() throws Exception {
+    assertExtract("/microformats/hcard/05-mailto-1.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.fn, "Ryan King");
+    assertContains(RDF.TYPE, vVCARD.Name);
+
+    assertContains(vVCARD.email, RDFUtils.iri("mailto:ryan@technorati.com"));
+
+    assertContains(vVCARD.given_name, "Ryan");
+    assertContains(vVCARD.family_name, "King");
+  }
+
+  @Test
+  public void testMailto2() throws Exception {
+    assertExtract("/microformats/hcard/06-mailto-2.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.fn, "Brian Suda");
+
+    assertContains(vVCARD.email, RDFUtils.iri("mailto:brian@example.com"));
+    assertContains(vVCARD.given_name, "Brian");
+    assertContains(vVCARD.family_name, "Suda");
+  }
+
+  @Test
+  public void testRelativeUrl() throws Exception {
+    assertExtract("/microformats/hcard/07-relative-url.html");
+    assertDefaultVCard();
+    assertJohn();
+    assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
+  }
+
+  @Test
+  public void testRelativeUrlBase() throws Exception {
+    assertExtract("/microformats/hcard/08-relative-url-base.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
+    assertJohn();
+  }
+
+  @Test
+  public void testRelativeUrlXmlBase1() throws Exception {
+    assertExtract("/microformats/hcard/09-relative-url-xmlbase-1.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
+    assertJohn();
+  }
+
+  @Test
+  public void testRelativeUrlXmlBase2() throws Exception {
+    assertExtract("/microformats/hcard/10-relative-url-xmlbase-2.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
+    assertJohn();
+  }
+
+  @Test
+  public void testMultipleUrls() throws Exception {
+    assertExtract("/microformats/hcard/11-multiple-urls.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/foo")));
+    assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/bar")));
+
+    assertJohn();
+  }
+
+  @Test
+  public void testImageSrc() throws Exception {
+    assertExtract("/microformats/hcard/12-img-src-url.html");
+    assertDefaultVCard();
+    assertJohn();
+  }
+
+  @Test
+  public void testPhotoLogo() throws Exception {
+    assertExtract("/microformats/hcard/13-photo-logo.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.photo,
+            RDFUtils.iri(("http://example.org/picture1.png")));
+    assertContains(vVCARD.photo,
+            RDFUtils.iri(("http://example.org/picture2.png")));
+    assertContains(vVCARD.logo,
+            RDFUtils.iri(("http://example.org/picture1.png")));
+    assertContains(vVCARD.logo,
+            RDFUtils.iri(("http://example.org/picture2.png")));
+    assertJohn();
+  }
+
+  @Test
+  public void testImgSrcDataUrl() throws Exception {
+    assertExtract("/microformats/hcard/14-img-src-data-url.html");
+    assertDefaultVCard();
+    Resource data = RDFUtils.iri("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAMAAAAp4XiDAAAABGdBTUEAAK/"
+            + "INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAASUExURf///8zMzJmZmWZmZ"
+            + "jMzMwAAAPOPemkAAAM1SURBVHjaYmBgYGBkYQUBFkYWFiCPCchixQAMCCZAACF0MAMVM4K4TFh0IGsBCC"
+            + "AkOxhYmBnAAKaHhZkZmxaAAGJgYIbpYGBihGgBWsTMzMwE4jIhaWGAYoAAYmCECDExYAcwGxkg5oNIgAB"
+            + "igDqLARdgZmGB2wICrKwAAcSA3xKgIxlZ0PwCEEAMBCxhgHoWSQtAADFAAxgfYEJ1GEAAQbQw4tUCsocB"
+            + "YQVAADEgu4uRkREeUCwszEwwLhOKLQABhNDCBA4aSDgwwhIAJKqYUPwCEEAMUK/AUwnc9aywJMCI7DAgA"
+            + "AggBohZ8JTBhGIJzCoWZL8ABBCYidAB8RUjWppkYUG2BSCAGMDqEMZiswUtXgACiAHsFYixTMywGGLGpgU"
+            + "WYgABxAA2mQkWCMyMqFoYmdD8ACQAAogBHJHMrCxg1cyIiICmCkYWDFsAAgiihYmZCewFFpR0BfI3LLch+"
+            + "QUggBiQ0iQjEyMDmh54qCBlUIAAYsCRJsElADQvgWKTlRGeKwECiAF3XgGmMEYQYADZzcoA9z5AAMG9RQC"
+            + "AtEC9DxBADFiyFyMjVi0wABBAWLQwQdIiuhYGWJIACCBg+KKUJ9BoBRdS2LQALQMIIGDQIEmwAO1kYcVWH"
+            + "CDZAhBAqFqYmOAxj2YNtAwDAYAAYmDEiBYWzHKKkRERYiwAAYSphZEZwxZGZiZQVEJTJkAAMTCyokc7M5o"
+            + "ORlC5wcoEjxeAAAJqQXU0UB6W5WFmABMtEzMi1wEEEFAbE0YyAUuzMMEsYQalMkQSBQggUDmNPU3C9IA4L"
+            + "CxI+QUggEBiKOU8yExgqccCL3chnkPKlQABhGo6ejHBDKmdUHMlQAAhhQvQaGZGkBIkjcAMywLmI+VKgAB"
+            + "CSowsTJhZkhlWXiBpAQggYBqBZl9GVOdBcz0LZqEEEEAMqLULMBLg1THWog9IAwQQA0qiZcRW5aPbAhBAD"
+            + "Cg1El4tMAAQQAxoiZYZXnTh1AIQQAzo2QlYpDDjcBgrxGEAAcSAJTthswmiBUwDBBC2GpkZJTaRvQ+mAQK"
+            + "IAUuuxdZWQvILQABBmSxMjBj5EpcWgACCMoFOYYSpZyHQHgMIMACt2hmoVEikCQAAAABJRU5ErkJggg==");
+
+    assertContains(vVCARD.photo, data);
+    assertContains(vVCARD.logo, data);
+    assertJohn();
+  }
+
+  @Test
+  public void testHonorificAdditionalSingle() throws Exception {
+    assertExtract("/microformats/hcard/15-honorific-additional-single.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.fn, "Mr. John Maurice Doe, Ph.D.");
+
+    assertContains(vVCARD.honorific_prefix, "Mr.");
+    assertContains(vVCARD.honorific_suffix, "Ph.D.");
+
+    assertContains(vVCARD.given_name, "John");
+    assertContains(vVCARD.additional_name, "Maurice");
+    assertContains(vVCARD.family_name, "Doe");
+  }
+
+  @Test
+  public void testHonorificAdditionalMultiple() throws Exception {
+    assertExtract("/microformats/hcard/16-honorific-additional-multiple.html");
+    assertDefaultVCard();
+    assertContains(vVCARD.honorific_prefix, "Mr.");
+    assertContains(vVCARD.honorific_prefix, "Dr.");
+
+    assertContains(vVCARD.honorific_suffix, "Ph.D.");
+    assertContains(vVCARD.honorific_suffix, "J.D.");
+
+    assertContains(vVCARD.given_name, "John");
+    assertContains(vVCARD.additional_name, "Maurice");
+    assertContains(vVCARD.additional_name, "Benjamin");
+    assertContains(vVCARD.family_name, "Doe");
+
+    assertContains(vVCARD.fn,
+            "Mr. Dr. John Maurice Benjamin Doe Ph.D., J.D.");
+  }
+
+  @Test
+  public void testEMailNotUri() throws Exception {
+    assertExtract("/microformats/hcard/17-email-not-uri.html");
+    assertDefaultVCard();
+    assertJohn();
+    assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
+  }
+
+  @Test
+  public void testObjectDataHttpUri() throws Exception {
+    assertExtract("/microformats/hcard/18-object-data-http-uri.html");
+    assertDefaultVCard();
+    assertJohn();
+  }
+
+  @Test
+  public void testObjectDataDataUri() throws Exception {
+    assertExtract("/microformats/hcard/19-object-data-data-uri.html");
+    assertDefaultVCard();
+    assertJohn();
+
+    assertContains(vVCARD.photo, (Resource) null);
+    assertContains(vVCARD.logo, (Resource) null);
+  }
+
+  @Test
+  public void testImgAlt() throws Exception {
+    assertExtract("/microformats/hcard/20-image-alt.html");
+    assertDefaultVCard();
+    Resource uri = RDFUtils.iri("http://example.com/foo.png");
+    assertContains(vVCARD.photo, uri);
+    assertContains(vVCARD.logo, uri);
+    assertJohn();
+  }
+
+  @Test
+  public void testAdr() throws Exception {
+    assertExtract("/microformats/hcard/22-adr.html");
+    assertDefaultVCard();
+    assertJohn();
+    assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
+  }
+
+  @Test
+  public void testBirthDayDate() throws Exception {
+    assertExtract("/microformats/hcard/27-bday-date.html");
+    assertModelNotEmpty();
+    assertContains(vVCARD.fn, "john doe");
+    assertContains(vVCARD.given_name, "john");
+    assertContains(vVCARD.family_name, "doe");
+    assertContains(vVCARD.bday, "2000-01-01");
+  }
+
+  @Test
+  public void testBirthDayDateTime() throws Exception {
+    assertExtract("/microformats/hcard/28-bday-datetime.html");
+    assertModelNotEmpty();
+    assertContains(vVCARD.fn, "john doe");
+    assertContains(vVCARD.given_name, "john");
+    assertContains(vVCARD.family_name, "doe");
+    assertContains(vVCARD.bday, "2000-01-01T00:00:00");
+  }
+
+  @Test
+  public void testBirthDayDateTimeTimeZone() throws Exception {
+    assertExtract("/microformats/hcard/29-bday-datetime-timezone.html");
+    assertModelNotEmpty();
+    assertContains(vVCARD.fn, "john doe");
+    assertContains(vVCARD.given_name, "john");
+    assertContains(vVCARD.family_name, "doe");
+    assertContains(vVCARD.bday, "2000-01-01T00:00:00-0800");
+  }
+
+  @Test
+  public void testArea() throws Exception {
+    assertExtract("/microformats/hcard/33-area.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    try {
+      while (statements.hasNext()) {
+        Resource vcard = statements.next().getSubject();
+
+        assertNotNull(findObject(vcard, vVCARD.fn));
+        assertEquals("Joe Public",
+                findObjectAsLiteral(vcard, vVCARD.fn));
+        assertNotNull(findObject(vcard, vVCARD.url));
+        String url = findObjectAsLiteral(vcard, vVCARD.url);
+        assertNotNull(findObject(vcard, vVCARD.email));
+        String mail = findObjectAsLiteral(vcard, vVCARD.email);
+        assertEquals("http://example.com/", url);
+        assertEquals("mailto:joe@example.com", mail);
+      }
+    } finally {
+      statements.close();
+    }
+
+    // Check that there are 4 organizations.
+    assertStatementsSize(RDF.TYPE, vVCARD.Organization, 4);
+    statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
+    try {
+      while (statements.hasNext()) {
+        Resource org = statements.next().getSubject();
+        assertContains(null, vVCARD.org, org);
+        assertNotNull(findObject(org, vVCARD.organization_name));
+        assertEquals("Joe Public",
+                findObjectAsLiteral(org, vVCARD.organization_name));
+      }
+    } finally {
+      statements.close();
+    }
+  }
+
+  @Test
+  public void testNotes() throws Exception {
+    final String[] NOTES = { "Note 1", "Note 3",
+    "Note 4 with a ; and a , to be escaped" };
+
+    assertExtract("/microformats/hcard/34-notes.html");
+    assertModelNotEmpty();
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    try {
+      while (statements.hasNext()) {
+        Resource vcard = statements.next().getSubject();
+        String fn = findObjectAsLiteral(vcard, vVCARD.fn);
+        String mail = findObjectAsLiteral(vcard, vVCARD.email);
+        assertEquals("Joe Public", fn);
+        assertEquals("mailto:joe@example.com", mail);
+      }
+    } finally {
+      statements.close();
+    }
+    for (String note : NOTES) {
+      assertContains(vVCARD.note, note);
+    }
+  }
+
+  @Test
+  public void testIncludePattern() throws Exception {
+    assertExtract("/microformats/hcard/35-include-pattern.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
+
+    RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+            vVCARD.Name);
+    try {
+      while (statements.hasNext()) {
+        Resource name = statements.next().getSubject();
+        assertNotNull(findObject(name, vVCARD.given_name));
+        String gn = findObjectAsLiteral(name, vVCARD.given_name);
+        assertEquals("James", gn);
+        assertNotNull(findObject(name, vVCARD.family_name));
+        String fn = findObjectAsLiteral(name, vVCARD.family_name);
+        assertEquals("Levine", fn);
+      }
+    } finally {
+      statements.close();
+    }
+
+    assertStatementsSize(RDF.TYPE, vVCARD.Organization, 2);
+    statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
+    try {
+      while (statements.hasNext()) {
+        Resource org = statements.next().getSubject();
+        assertNotNull(findObject(org, vVCARD.organization_name));
+        assertEquals("SimplyHired",
+                findObjectAsLiteral(org, vVCARD.organization_name));
+
+        RepositoryResult<Statement> statements2 = getStatements(null,
+                vVCARD.org, org);
+        try {
+          while (statements2.hasNext()) {
+            Resource vcard = statements2.next().getSubject();
+            assertNotNull(findObject(vcard, vVCARD.title));
+            assertEquals("Microformat Brainstormer",
+                    findObjectAsLiteral(vcard, vVCARD.title));
+          }
+        } finally {
+          statements2.close();
+        }
+      }
+    } finally {
+      statements.close();
+    }
+  }
+
+  @Test
+  public void testUid() throws Exception {
+    assertExtract("/microformats/hcard/38-uid.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+    RepositoryResult<Statement> iter = getStatements(null, RDF.TYPE,
+            vVCARD.VCard);
+    while (iter.hasNext()) {
+      Resource vcard = iter.next().getSubject();
+      assertNotNull(findObject(vcard, vVCARD.fn));
+      String fn = findObjectAsLiteral(vcard, vVCARD.fn);
+      assertNotNull(findObject(vcard, vVCARD.url));
+      String url = findObjectAsLiteral(vcard, vVCARD.url);
+      assertNotNull(findObject(vcard, vVCARD.uid));
+      String uid = findObjectAsLiteral(vcard, vVCARD.uid);
+      assertEquals("Ryan King", fn);
+      assertEquals("http://theryanking.com/contact/", url);
+      assertEquals("http://theryanking.com/contact/", uid);
+
+    }
+  }
+
+  @Test
+  public void testIgnoreChildren() throws Exception {
+    assertExtract("/microformats/hcard/41-ignore-children.html");
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
+    assertContains(vVCARD.fn, "Melanie Kl\u00f6\u00df");
+    assertContains(vVCARD.email, RDFUtils.iri("mailto:mkloes@gmail.com"));
+    assertContains(vVCARD.adr, (Resource) null);
+    assertNotContains(null, vVCARD.postal_code, "53127");
+    assertNotContains(null, vVCARD.locality, "Bonn");
+    assertNotContains(null, vVCARD.street_address, "Ippendorfer Weg. 24");
+    assertNotContains(null, vVCARD.country_name, "Germany");
+  }
+
+  /**
+   * Tests that the HCardName data is not cumulative and is cleaned up at each
+   * extraction.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testCumulativeHNames() throws Exception {
+    assertExtract("/microformats/hcard/linkedin-michelemostarda.html");
+    assertModelNotEmpty();
+    assertStatementsSize(vVCARD.given_name, "Michele", 7);
+    assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
+  }
+
+  /**
+   * Tests the detection and prevention of the inclusion of an ancestor by a
+   * sibling node. This test is related to issue <a
+   * href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
+   *
+   * @throws IOException
+   * @throws ExtractionException
+   */
+  @Test
+  public void testInfiniteLoop() throws IOException, ExtractionException {
+    assertExtract("/microformats/hcard/infinite-loop.html", false);
+    assertIssue(IssueReport.IssueLevel.WARNING,
+            ".*Current node tries to include an ancestor node.*");
+  }
+
+  /**
+   * Tests extractor performances. This test is related to issue <a
+   * href="https://issues.apache.org/jira/browse/ANY23-76">ANY23-76</a>.
+   */
+  @Test(timeout = 30 * 1000)
+  public void testExtractionPerformance() {
+    assertExtract("/microformats/hcard/performance.html");
+  }
+
+  private void assertDefaultVCard() throws Exception {
+    assertModelNotEmpty();
+    assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
+  }
+
+  private void assertJohn() throws Exception {
+    assertContains(vVCARD.fn, "John Doe");
+    assertContains(vVCARD.given_name, "John");
+    assertContains(vVCARD.family_name, "Doe");
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
index 815d220..5f9ee7d 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
@@ -23,7 +23,6 @@ import org.apache.any23.vocab.FOAF;
 import org.apache.any23.vocab.HListing;
 import org.apache.any23.vocab.SINDICE;
 import org.junit.Test;
-import org.junit.Ignore;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.vocabulary.RDF;
 import org.slf4j.Logger;
@@ -151,7 +150,6 @@ public class HListingExtractorTest extends AbstractExtractorTestCase {
 				RDFUtils.iri(baseIRI.stringValue() + "pic.jpg"));
 	}
 
-	@Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull")
 	@Test
 	public void testKelkoo() throws Exception {
 		assertExtract("/microformats/hlisting/kelkoo.html");
@@ -187,7 +185,6 @@ public class HListingExtractorTest extends AbstractExtractorTestCase {
 		assertContains(vHLISTING.price, "\u00A3480.17");
 	}
 
-	@Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull")
 	@Test
 	public void testKelkooFull() throws Exception {
 		assertExtract("/microformats/hlisting/kelkoo-full.html");

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
index ffd4e26..4fa237e 100644
--- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
@@ -37,7 +37,6 @@ import org.apache.any23.extractor.html.TagSoupParser;
 import org.apache.any23.util.StreamUtils;
 import org.apache.commons.io.IOUtils;
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.Timeout;
@@ -54,12 +53,11 @@ import static org.junit.Assert.assertFalse;
  */
 public class MicrodataParserTest {
 
-	@Rule
-	public final Timeout timeout = new Timeout(100, TimeUnit.SECONDS);
-	
+//    @Rule
+//    public final Timeout timeout = new Timeout(100, TimeUnit.SECONDS);
+
     private static final Logger logger = LoggerFactory.getLogger(MicrodataParserTest.class);
 
-    @Ignore("TODO: Determine the cause of this")
     @Test
     public void testBasicFeatures() throws IOException {
         extractItemsAndVerifyJSONSerialization(
@@ -68,7 +66,6 @@ public class MicrodataParserTest {
         );
     }
 
-    @Ignore("TODO: Determine the cause of this")
     @Test
     public void testNestedMicrodata() throws IOException {
         extractItemsAndVerifyJSONSerialization(
@@ -77,7 +74,6 @@ public class MicrodataParserTest {
         );
     }
 
-    @Ignore("TODO: Determine the cause of this")
     @Test
     public void testAdvancedItemrefManagement() throws IOException {
         extractItemsAndVerifyJSONSerialization(
@@ -86,7 +82,6 @@ public class MicrodataParserTest {
         );
     }
 
-    @Ignore("TODO: Determine the cause of this")
     @Test
     public void testMicrodataJSONSerialization() throws IOException {
         final Document document = getMicrodataDom("microdata-nested");
@@ -97,6 +92,7 @@ public class MicrodataParserTest {
         final String expected = StreamUtils.asString(
                 this.getClass().getResourceAsStream("/microdata/microdata-json-serialization.json")
         );
+
         Assert.assertEquals("Unexpected serialization for Microdata file.", expected, baos.toString());
     }
 
@@ -122,29 +118,29 @@ public class MicrodataParserTest {
         final AtomicBoolean foundFailure = new AtomicBoolean(false);
         for (int i = 0; i < threadCount; i++) {
             threads.add(new Thread("Test-thread-" + i) {
-					@Override
-					public void run() {
-						try {
-							beforeLatch.await();
-							int counter = 0;
-							while (counter++ < attemptCount && !foundFailure.get()) {
-								final Document document = getDom(content);
-								final MicrodataParserReport report = MicrodataParser.getMicrodata(document);
-								final ItemScope target = report.getDetectedItemScopes()[4];
-								Date actualDate = target.getProperties().get("birthday").get(0).getValue().getAsDate();
-								if (!expectedDate.equals(actualDate)) {
-									foundFailure.set(true);
-								}
-							}
-						}
-						catch (Exception ex) {
-							ex.printStackTrace();
-							foundFailure.set(true);
-						}
-						finally {
-							afterLatch.countDown();
-						}
-					}
+              @Override
+              public void run() {
+                try {
+                  beforeLatch.await();
+                  int counter = 0;
+                  while (counter++ < attemptCount && !foundFailure.get()) {
+                    final Document document = getDom(content);
+                    final MicrodataParserReport report = MicrodataParser.getMicrodata(document);
+                    final ItemScope target = report.getDetectedItemScopes()[4];
+                    Date actualDate = target.getProperties().get("birthday").get(0).getValue().getAsDate();
+                    if (!expectedDate.equals(actualDate)) {
+                      foundFailure.set(true);
+                    }
+                  }
+                }
+                catch (Exception ex) {
+                  ex.printStackTrace();
+                  foundFailure.set(true);
+                }
+                finally {
+                  afterLatch.countDown();
+                }
+              }
             });
         }
         for (Thread thread : threads) {
@@ -167,8 +163,8 @@ public class MicrodataParserTest {
     public void testDeferProperties() throws IOException, MicrodataParserException {
         final Document document = getMicrodataDom("microdata-itemref");
         final MicrodataParser parser = new MicrodataParser(document);
-        final ItemProp[] deferred = parser.deferProperties("ip5", "ip4", "ip3", "unexisting");
-        Assert.assertEquals(3, deferred.length);
+        final ItemProp[] deferred = parser.deferProperties(document.getElementById("is2"), "ip5", "ip4", "ip3", "unexisting");
+        Assert.assertEquals(2, deferred.length);
     }
 
     /**
@@ -181,8 +177,8 @@ public class MicrodataParserTest {
     public void testDeferPropertiesLoopDetection1() throws IOException, MicrodataParserException {
         final Document document = getMicrodataDom("microdata-itemref");
         final MicrodataParser parser = new MicrodataParser(document);
-        parser.setErrorMode(MicrodataParser.ErrorMode.StopAtFirstError);
-        parser.deferProperties("loop0");
+        parser.setErrorMode(MicrodataParser.ErrorMode.STOP_AT_FIRST_ERROR);
+        parser.deferProperties(null, "loop0");
     }
 
     /**
@@ -195,8 +191,8 @@ public class MicrodataParserTest {
     public void testDeferPropertiesLoopDetection2() throws IOException, MicrodataParserException {
         final Document document = getMicrodataDom("microdata-itemref");
         final MicrodataParser parser = new MicrodataParser(document);
-        parser.setErrorMode(MicrodataParser.ErrorMode.StopAtFirstError);
-        parser.deferProperties("loop2");
+        parser.setErrorMode(MicrodataParser.ErrorMode.STOP_AT_FIRST_ERROR);
+        parser.deferProperties(null, "loop2");
     }
 
     /**
@@ -210,9 +206,10 @@ public class MicrodataParserTest {
     public void testDeferPropertiesStateManagement() throws IOException, MicrodataParserException {
         final Document document = getMicrodataDom("microdata-itemref");
         final MicrodataParser parser = new MicrodataParser(document);
-        Assert.assertEquals(1, parser.deferProperties("ip1").length);
-        Assert.assertEquals(1, parser.deferProperties("ip1").length);
-        Assert.assertEquals(1, parser.deferProperties("ip1").length);
+        String ip1 = "ip1";
+        Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length);
+        Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length);
+        Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length);
     }
 
     private Document getDom(String document) throws IOException {
@@ -247,7 +244,7 @@ public class MicrodataParserTest {
     private void extractItemsAndVerifyJSONSerialization(String htmlFile, String expectedResult)
     throws IOException {
         final MicrodataParserReport report = extractItems(htmlFile);
-        final ItemScope[]                items  = report.getDetectedItemScopes();
+        final ItemScope[] items = report.getDetectedItemScopes();
         final MicrodataParserException[] errors = report.getErrors();
 
         logger.debug("begin itemScopes");
@@ -267,7 +264,7 @@ public class MicrodataParserTest {
         final int expectedResults = getExpectedResultCount(resultContent);
         final int expectedErrors  = getExpectedErrorsCount(resultContent);
         Assert.assertEquals("Unexpected number of detect items.", expectedResults, items.length);
-        Assert.assertEquals("Unexpected number of errors."      , expectedErrors, errors.length);
+        Assert.assertEquals("Unexpected number of errors.", expectedErrors, errors.length);
 
         for (int i = 0; i < items.length; i++) {
             Assert.assertEquals(

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
index f52cd59..680fe47 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
@@ -19,7 +19,6 @@ package org.apache.any23.extractor.rdfa;
 
 import org.apache.any23.extractor.ExtractorFactory;
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.eclipse.rdf4j.model.Statement;
 import org.eclipse.rdf4j.repository.RepositoryException;
@@ -65,7 +64,6 @@ public class RDFaExtractorTest extends AbstractRDFaExtractorTestCase {
         }
     }
 
-    @Ignore("RDFa1 parser not able to parse RDFa11 CURIES in this case")
 	@Test
 	public void testRDFa11CURIEs() throws Exception {
 	}

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java b/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
index f1f2d88..85f33b5 100644
--- a/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
@@ -75,15 +75,15 @@ public class TemplateXPathExtractorRuleImplTest {
     @Test
     public void testAddRemoveTemplates() {
         final QuadTemplate template1 = new QuadTemplate(
-                new TemplateSubject(TemplateSubject.Type.uri, "http://sub1", false),
+                new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
                 new TemplatePredicate("http://pred1", false),
-                new TemplateObject(TemplateObject.Type.uri, "http://obj1", false),
+                new TemplateObject(TemplateObject.Type.URI, "http://obj1", false),
                 new TemplateGraph("http://graph1", false)
         );
         final QuadTemplate template2 = new QuadTemplate(
-                new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+                new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
                 new TemplatePredicate("http://pred2", false),
-                new TemplateObject(TemplateObject.Type.uri, "http://obj2", false),
+                new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
                 new TemplateGraph("http://graph2", false)
         );
 
@@ -94,9 +94,9 @@ public class TemplateXPathExtractorRuleImplTest {
 
         xPathExtractionRule.add(new Variable("v1", "//"));
         final QuadTemplate template3 = new QuadTemplate(
-                new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+                new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
                 new TemplatePredicate("http://pred2", false),
-                new TemplateObject(TemplateObject.Type.uri, "v1", true),
+                new TemplateObject(TemplateObject.Type.URI, "v1", true),
                 new TemplateGraph("http://graph2", false)
         );
         xPathExtractionRule.add(template3);
@@ -106,9 +106,9 @@ public class TemplateXPathExtractorRuleImplTest {
     public void testAddTemplateWithNoDeclaredVarCheck() {
         xPathExtractionRule.add(
                 new QuadTemplate(
-                        new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+                        new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
                         new TemplatePredicate("http://pred2", false),
-                        new TemplateObject(TemplateObject.Type.uri, "v1", true),
+                        new TemplateObject(TemplateObject.Type.URI, "v1", true),
                         new TemplateGraph("http://graph2", false)
                 )
         );
@@ -127,15 +127,15 @@ public class TemplateXPathExtractorRuleImplTest {
     @Test
     public void testProcess() throws IOException {
         final QuadTemplate template1 = new QuadTemplate(
-                new TemplateSubject(TemplateSubject.Type.uri, "http://sub1", false),
+                new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
                 new TemplatePredicate("http://pred1", false),
-                new TemplateObject(TemplateObject.Type.literal, "v1", true),
+                new TemplateObject(TemplateObject.Type.LITERAL, "v1", true),
                 new TemplateGraph("http://graph1", false)
         );
         final QuadTemplate template2 = new QuadTemplate(
-                new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+                new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
                 new TemplatePredicate("v2", true),
-                new TemplateObject(TemplateObject.Type.uri, "http://obj2", false),
+                new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
                 new TemplateGraph("http://graph2", false)
         );
 

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java b/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
index ce5b1d6..de6d980 100644
--- a/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
+++ b/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
@@ -28,7 +28,7 @@ import org.eclipse.rdf4j.model.Value;
 import org.eclipse.rdf4j.model.ValueFactory;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 
-import static org.mockito.Matchers.any;
+import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.times;

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java b/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
index ebe6ba3..92ae30f 100644
--- a/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
+++ b/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
@@ -20,11 +20,7 @@ package org.apache.any23.writer;
 import org.junit.Assert;
 import org.junit.Test;
 import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.BNodeImpl;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
-import org.eclipse.rdf4j.model.vocabulary.RDF;
-
 import java.io.ByteArrayOutputStream;
 
 /**

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/service/src/test/java/org/apache/any23/servlet/ServletTest.java
----------------------------------------------------------------------
diff --git a/service/src/test/java/org/apache/any23/servlet/ServletTest.java b/service/src/test/java/org/apache/any23/servlet/ServletTest.java
index bb168a1..bc87737 100644
--- a/service/src/test/java/org/apache/any23/servlet/ServletTest.java
+++ b/service/src/test/java/org/apache/any23/servlet/ServletTest.java
@@ -25,7 +25,6 @@ import org.apache.any23.util.StringUtils;
 import org.junit.Assert;
 import org.junit.After;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.mortbay.jetty.testing.HttpTester;
 import org.mortbay.jetty.testing.ServletTester;
@@ -229,7 +228,6 @@ public class ServletTest {
      * @throws Exception
      */
     @Test
-    @Ignore
     public void testGETwithURLEncoding() throws Exception {
         content = null;
         HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller");
@@ -241,7 +239,6 @@ public class ServletTest {
      * @throws Exception
      */
     @Test
-    @Ignore
     public void testGETwithURLEncodingWithQuery() throws Exception {
         content = null;
         HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller?appo=xxx");
@@ -253,7 +250,6 @@ public class ServletTest {
      * @throws Exception
      */
     @Test
-    @Ignore
     public void testGETwithURLEncodingWithFragment() throws Exception {
         content = null;
         HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller#abcde");


[6/6] any23 git commit: Merge branch 'ANY23-320'

Posted by le...@apache.org.
Merge branch 'ANY23-320'


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6d0606f9
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6d0606f9
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6d0606f9

Branch: refs/heads/master
Commit: 6d0606f9b2ff3fa05d26b454e9a8fb24d3bfa24d
Parents: 97e364a 4640860
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Jan 8 08:16:00 2018 -0500
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Jan 8 08:16:00 2018 -0500

----------------------------------------------------------------------
 .../any23/cli/ExtractorDocumentationTest.java   |    2 -
 .../java/org/apache/any23/cli/RoverTest.java    |    2 -
 .../org/apache/any23/cli/SimpleRoverTest.java   |    2 -
 .../any23/extractor/csv/CSVExtractor.java       |   23 +-
 .../extractor/html/EmbeddedJSONLDExtractor.java |  363 ++--
 .../any23/extractor/html/HTMLMetaExtractor.java |   58 +-
 .../apache/any23/extractor/microdata/Item.java  |   10 +-
 .../extractor/microdata/ItemPropValue.java      |   31 +-
 .../any23/extractor/microdata/ItemScope.java    |   29 +-
 .../extractor/microdata/MicrodataExtractor.java |   35 +-
 .../extractor/microdata/MicrodataParser.java    |  136 +-
 .../any23/extractor/xpath/QuadTemplate.java     |    1 +
 .../any23/extractor/xpath/TemplateObject.java   |   39 +-
 .../any23/extractor/xpath/TemplateSubject.java  |   13 +-
 .../any23/extractor/yaml/ElementsProcessor.java |   24 +-
 .../any23/rdf/Any23ValueFactoryWrapper.java     |   61 +-
 .../java/org/apache/any23/rdf/RDFUtils.java     |   92 +-
 .../XMLValidationReportSerializer.java          |   21 +-
 .../any23/validator/rule/AboutNotURIRule.java   |    1 +
 .../validator/rule/MetaNameMisuseRule.java      |    1 +
 .../org/apache/any23/vocab/RDFSchemaUtils.java  |   24 +-
 .../any23/extractor/csv/CSVExtractorTest.java   |  178 +-
 .../html/AbstractExtractorTestCase.java         | 1592 ++++++++-------
 .../extractor/html/HCardExtractorTest.java      | 1852 +++++++++---------
 .../extractor/html/HListingExtractorTest.java   |    3 -
 .../microdata/MicrodataParserTest.java          |   81 +-
 .../any23/extractor/rdfa/RDFaExtractorTest.java |    2 -
 .../TemplateXPathExtractorRuleImplTest.java     |   24 +-
 .../any23/filter/IgnoreAccidentalRDFaTest.java  |    2 +-
 .../org/apache/any23/writer/JSONWriterTest.java |    4 -
 .../org/apache/any23/servlet/ServletTest.java   |    4 -
 ....2.1-non-normative-example-1-expected.nquads |    8 +-
 .../5.2.1-non-normative-example-1.html          |   48 +-
 ....2.1-non-normative-example-2-expected.nquads |   33 +-
 .../5.2.1-non-normative-example-2.html          |   16 +-
 .../microdata-basic-expected.properties         |    6 +-
 .../resources/microdata/microdata-basic.html    |   15 +-
 .../microdata-itemref-expected.properties       |   20 +-
 .../resources/microdata/microdata-itemref.html  |   46 +-
 .../microdata/microdata-json-serialization.json |    2 +-
 .../microdata/microdata-nested-expected.nquads  |   19 +-
 .../microdata-nested-expected.properties        |    4 +-
 .../resources/microdata/microdata-nested.html   |   33 +-
 .../microdata-richsnippet-expected.nquads       |   27 +-
 44 files changed, 2468 insertions(+), 2519 deletions(-)
----------------------------------------------------------------------



[3/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and ANY23-131 Nested Microdata are not extracted

Posted by le...@apache.org.
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
index 8886e31..ed300af 100644
--- a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
@@ -21,11 +21,9 @@ import org.apache.any23.extractor.ExtractorFactory;
 import org.apache.any23.extractor.html.AbstractExtractorTestCase;
 import org.apache.any23.vocab.CSV;
 import org.junit.Test;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 import org.eclipse.rdf4j.model.vocabulary.RDF;
 import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
-import org.eclipse.rdf4j.repository.RepositoryException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -36,93 +34,93 @@ import org.slf4j.LoggerFactory;
  */
 public class CSVExtractorTest extends AbstractExtractorTestCase {
 
-	private static final Logger logger = LoggerFactory
-			.getLogger(CSVExtractorTest.class);
-
-	@Override
-	protected ExtractorFactory<?> getExtractorFactory() {
-		return new CSVExtractorFactory();
-	}
-
-	@Test
-	public void testExtractionCommaSeparated() throws Exception {
-		CSV csv = CSV.getInstance();
-		assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
-		logger.debug(dumpModelToRDFXML());
-
-		assertModelNotEmpty();
-		assertStatementsSize(null, null, null, 28);
-		assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
-		assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
-				XMLSchema.INTEGER));
-		assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
-				XMLSchema.INTEGER));
-	}
-
-	@Test
-	public void testExtractionSemicolonSeparated() throws Exception {
-		CSV csv = CSV.getInstance();
-		assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
-		logger.debug(dumpModelToRDFXML());
-
-		assertModelNotEmpty();
-		assertStatementsSize(null, null, null, 28);
-		assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
-		assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
-				XMLSchema.INTEGER));
-		assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
-				XMLSchema.INTEGER));
-	}
-
-	@Test
-	public void testExtractionTabSeparated() throws Exception {
-		CSV csv = CSV.getInstance();
-		assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
-		logger.debug(dumpModelToRDFXML());
-
-		assertModelNotEmpty();
-		assertStatementsSize(null, null, null, 28);
-		assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
-		assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
-				XMLSchema.INTEGER));
-		assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
-				XMLSchema.INTEGER));
-	}
-
-	@Test
-	public void testTypeManagement() throws Exception {
-		CSV csv = CSV.getInstance();
-		assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
-		logger.debug(dumpModelToRDFXML());
-
-		assertModelNotEmpty();
-		assertStatementsSize(null, null, null, 21);
-		assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
-		assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2",
-				XMLSchema.INTEGER));
-		assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
-				XMLSchema.INTEGER));
-		assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
-		assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
-		assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
-	}
-
-	@Test
-	public void testExtractionEmptyValue() throws Exception {
-		CSV csv = CSV.getInstance();
-		assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
-		logger.debug(dumpModelToRDFXML());
-
-		assertModelNotEmpty();
-		assertStatementsSize(null, null, null, 25);
-		assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
-		assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
-				XMLSchema.INTEGER));
-		assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
-				XMLSchema.INTEGER));
-		assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
-		assertContains(null, null,
-				SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
-	}
+  private static final Logger logger = LoggerFactory
+          .getLogger(CSVExtractorTest.class);
+
+  @Override
+  protected ExtractorFactory<?> getExtractorFactory() {
+    return new CSVExtractorFactory();
+  }
+
+  @Test
+  public void testExtractionCommaSeparated() throws Exception {
+    CSV csv = CSV.getInstance();
+    assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
+    logger.debug(dumpModelToRDFXML());
+
+    assertModelNotEmpty();
+    assertStatementsSize(null, null, null, 28);
+    assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+    assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+            XMLSchema.INTEGER));
+    assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+            XMLSchema.INTEGER));
+  }
+
+  @Test
+  public void testExtractionSemicolonSeparated() throws Exception {
+    CSV csv = CSV.getInstance();
+    assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
+    logger.debug(dumpModelToRDFXML());
+
+    assertModelNotEmpty();
+    assertStatementsSize(null, null, null, 28);
+    assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+    assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+            XMLSchema.INTEGER));
+    assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+            XMLSchema.INTEGER));
+  }
+
+  @Test
+  public void testExtractionTabSeparated() throws Exception {
+    CSV csv = CSV.getInstance();
+    assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
+    logger.debug(dumpModelToRDFXML());
+
+    assertModelNotEmpty();
+    assertStatementsSize(null, null, null, 28);
+    assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+    assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+            XMLSchema.INTEGER));
+    assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+            XMLSchema.INTEGER));
+  }
+
+  @Test
+  public void testTypeManagement() throws Exception {
+    CSV csv = CSV.getInstance();
+    assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
+    logger.debug(dumpModelToRDFXML());
+
+    assertModelNotEmpty();
+    assertStatementsSize(null, null, null, 21);
+    assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+    assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2",
+            XMLSchema.INTEGER));
+    assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+            XMLSchema.INTEGER));
+    assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
+    assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
+    assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
+  }
+
+  @Test
+  public void testExtractionEmptyValue() throws Exception {
+    CSV csv = CSV.getInstance();
+    assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
+    logger.debug(dumpModelToRDFXML());
+
+    assertModelNotEmpty();
+    assertStatementsSize(null, null, null, 25);
+    assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+    assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+            XMLSchema.INTEGER));
+    assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+            XMLSchema.INTEGER));
+    assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
+    assertContains(null, null,
+            SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
index 855a88c..5354924 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
@@ -31,6 +31,7 @@ import org.apache.any23.writer.RepositoryWriter;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
+import org.eclipse.rdf4j.common.iteration.Iterations;
 import org.eclipse.rdf4j.model.BNode;
 import org.eclipse.rdf4j.model.Literal;
 import org.eclipse.rdf4j.model.Resource;
@@ -62,802 +63,799 @@ import java.util.Map;
  */
 public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
 
-	/**
-	 * Base test document.
-	 */
-	protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/"); // TODO:
-																			// change
-																			// base
-																			// IRI
-																			// string.
-
-	/**
-	 * Internal connection used to collect extraction results.
-	 */
-	protected RepositoryConnection conn;
-
-	/**
-	 * The latest generated report.
-	 */
-	private SingleDocumentExtractionReport report;
-
-	private Sail store;
-
-	private SailRepository repository;
-
-	/**
-	 * Constructor.
-	 */
-	public AbstractExtractorTestCase() {
-		super();
-	}
-
-	/**
-	 * @return the factory of the extractor to be tested.
-	 */
-	protected abstract ExtractorFactory<?> getExtractorFactory();
-
-	/**
-	 * Test case initialization.
-	 * 
-	 * @throws Exception
-	 */
-	@Before
-	public void setUp() throws Exception {
-		super.setUp();
-		store = new MemoryStore();
-		repository = new SailRepository(store);
-		repository.initialize();
-		conn = repository.getConnection();
-	}
-
-	/**
-	 * Test case resources release.
-	 *
-	 * @throws RepositoryException
-	 */
-	@After
-	public void tearDown() throws RepositoryException {
-		try {
-			conn.close();
-		} finally {
-			repository.shutDown();
-		}
-		conn = null;
-		report = null;
-		store = null;
-		repository = null;
-	}
-
-	/**
-	 * @return the connection to the memory repository.
-	 */
-	protected RepositoryConnection getConnection() {
-		return conn;
-	}
-
-	/**
-	 * @return the last generated report.
-	 */
-	protected SingleDocumentExtractionReport getReport() {
-		return report;
-	}
-
-	/**
-	 * Returns the list of issues raised by a given extractor.
-	 *
-	 * @param extractorName
-	 *            name of the extractor.
-	 * @return collection of issues.
-	 */
-	protected Collection<IssueReport.Issue> getIssues(String extractorName) {
-		for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report
-				.getExtractorToIssues().entrySet()) {
-			if (issueEntry.getKey().equals(extractorName)) {
-				return issueEntry.getValue();
-			}
-		}
-		return Collections.emptyList();
-	}
-
-	/**
-	 * Returns the list of issues raised by the extractor under testing.
-	 *
-	 * @return collection of issues.
-	 */
-	protected Collection<IssueReport.Issue> getIssues() {
-		return getIssues(getExtractorFactory().getExtractorName());
-	}
-
-	/**
-	 * Applies the extractor provided by the {@link #getExtractorFactory()} to
-	 * the specified resource.
-	 *
-	 * @param resource
-	 *            resource name.
-	 * @throws org.apache.any23.extractor.ExtractionException
-	 * @throws IOException
-	 */
-	// TODO: MimeType detector to null forces the execution of all extractors,
-	// but extraction
-	// tests should be based on mimetype detection.
-	protected void extract(String resource) throws ExtractionException,
-			IOException {
-		SingleDocumentExtraction ex = new SingleDocumentExtraction(
-				new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI
-						.toString()), getExtractorFactory(),
-				new RepositoryWriter(conn));
-		ex.setMIMETypeDetector(null);
-		report = ex.run();
-	}
-
-	/**
-	 * Performs data extraction over the content of a resource and assert that
-	 * the extraction was fine.
-	 *
-	 * @param resource
-	 *            resource name.
-	 * @param assertNoIssues
-	 *            if <code>true</code>invokes {@link #assertNoIssues()} after
-	 *            the extraction.
-	 */
-	protected void assertExtract(String resource, boolean assertNoIssues) {
-		try {
-			extract(resource);
-			if (assertNoIssues)
-				assertNoIssues();
-		} catch (ExtractionException ex) {
-			throw new RuntimeException(ex);
-		} catch (IOException ex) {
-			throw new RuntimeException(ex);
-		}
-	}
-
-	/**
-	 * Performs data extraction over the content of a resource and assert that
-	 * the extraction was fine and raised no issues.
-	 *
-	 * @param resource
-	 */
-	protected void assertExtract(String resource) {
-		assertExtract(resource, true);
-	}
-
-	/**
-	 * Asserts that the extracted triples contain the pattern
-	 * <code>(_ p o)</code>.
-	 *
-	 * @param p
-	 *            predicate
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertContains(IRI p, Resource o) throws RepositoryException {
-		assertContains(null, p, o);
-	}
-
-	/**
-	 * Asserts that the extracted triples contain the pattern
-	 * <code>(_ p o)</code>.
-	 *
-	 * @param p
-	 *            predicate
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertContains(IRI p, String o) throws RepositoryException {
-		assertContains(null, p, RDFUtils.literal(o));
-	}
-
-	/**
-	 * Asserts that the extracted triples contain the pattern
-	 * <code>(_ p o)</code>.
-	 *
-	 * @param p
-	 *            predicate
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertNotContains(IRI p, Resource o)
-			throws RepositoryException {
-		assertNotContains(null, p, o);
-	}
-
-	/**
-	 * Asserts that the extracted triples contain the pattern
-	 * <code>(s p o)</code>.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertContains(Resource s, IRI p, Value o)
-			throws RepositoryException {
-		Assert.assertTrue(
-				getFailedExtractionMessage()
-						+ String.format("Cannot find triple (%s %s %s)", s, p,
-								o), conn.hasStatement(s, p, o, false));
-	}
-
-	/**
-	 * Asserts that the extracted triples contain the pattern
-	 * <code>(s p o)</code>.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertNotContains(Resource s, IRI p, String o)
-			throws RepositoryException {
-		Assert.assertFalse(getFailedExtractionMessage(),
-				conn.hasStatement(s, p, RDFUtils.literal(o), false));
-	}
-
-	/**
-	 * Asserts that the extracted triples contain the pattern
-	 * <code>(s p o)</code>.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertNotContains(Resource s, IRI p, Resource o)
-			throws RepositoryException {
-		Assert.assertFalse(getFailedExtractionMessage(),
-				conn.hasStatement(s, p, o, false));
-	}
-
-	/**
-	 * Asserts that the model contains at least a statement.
-	 *
-	 * @throws RepositoryException
-	 */
-	protected void assertModelNotEmpty() throws RepositoryException {
-		Assert.assertFalse("The model is expected to not be empty."
-				+ getFailedExtractionMessage(), conn.isEmpty());
-	}
-
-	/**
-	 * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @throws RepositoryException
-	 */
-	protected void assertNotContains(Resource s, IRI p, Literal o)
-			throws RepositoryException {
-		Assert.assertFalse(getFailedExtractionMessage(),
-				conn.hasStatement(s, p, o, false));
-	}
-
-	/**
-	 * Asserts that the model is expected to contains no statements.
-	 *
-	 * @throws RepositoryException
-	 */
-	protected void assertModelEmpty() throws RepositoryException {
-		Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
-	}
-
-	/**
-	 * Asserts that the extraction generated no issues.
-	 */
-	protected void assertNoIssues() {
-		for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report
-				.getExtractorToIssues().entrySet()) {
-			if (entry.getValue().size() > 0) {
-				System.out.println("Unexpected issue for extractor " + entry.getKey()
-						+ " : " + entry.getValue());
-			}
-			for(Issue nextIssue : entry.getValue()) {
-				if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
-					Assert.fail("Unexpected issue for extractor " + entry.getKey()
-						+ " : " + entry.getValue());
-				}
-			}
-		}
-	}
-
-	/**
-	 * Asserts that an issue has been produced by the processed
-	 * {@link org.apache.any23.extractor.Extractor}.
-	 *
-	 * @param level
-	 *            expected issue level
-	 * @param issueRegex
-	 *            regex matching the expected human readable issue message.
-	 */
-	protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
-		final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory()
-				.getExtractorName());
-		boolean found = false;
-		for (IssueReport.Issue issue : issues) {
-			if (issue.getLevel() == level
-					&& issue.getMessage().matches(issueRegex)) {
-				found = true;
-				break;
-			}
-		}
-		Assert.assertTrue(String.format(
-				"Cannot find issue with level %s matching expression '%s'",
-				level, issueRegex), found);
-	}
-
-	/**
-	 * Verifies that the current model contains all the given statements.
-	 *
-	 * @param statements
-	 *            list of statements to be verified.
-	 * @throws RepositoryException
-	 */
-	public void assertContainsModel(Statement[] statements)
-			throws RepositoryException {
-		for (Statement statement : statements) {
-			assertContains(statement);
-		}
-	}
-
-	/**
-	 * Verifies that the current model contains all the statements declared in
-	 * the specified <code>modelFile</code>.
-	 *
-	 * @param modelResource
-	 *            the resource containing the model.
-	 * @throws RDFHandlerException
-	 * @throws IOException
-	 * @throws RDFParseException
-	 * @throws RepositoryException
-	 */
-	public void assertContainsModel(String modelResource)
-			throws RDFHandlerException, IOException, RDFParseException,
-			RepositoryException {
-		getConnection().remove(null, SINDICE.getInstance().date, (Value) null,
-				(Resource) null);
-		getConnection().remove(null, SINDICE.getInstance().size, (Value) null,
-				(Resource) null);
-		assertContainsModel(RDFUtils.parseRDF(modelResource));
-	}
-
-	/**
-	 * Asserts that the given pattern <code>(s p o)</code> satisfies the
-	 * expected number of statements.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @param expected
-	 *            expected matches.
-	 * @throws RepositoryException
-	 */
-	protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
-			throws RDFHandlerException, RepositoryException {
-		int statementsSize = getStatementsSize(s, p, o);
-		if (statementsSize != expected) {
-			getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out));
-		}
-
-		Assert.assertEquals("Unexpected number of matching statements.",
-				expected, statementsSize);
-	}
-
-	/**
-	 * Asserts that the given pattern <code>(_ p o)</code> satisfies the
-	 * expected number of statements.
-	 *
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @param expected
-	 *            expected matches.
-	 * @throws RepositoryException
-	 */
-	protected void assertStatementsSize(IRI p, Value o, int expected)
-			throws RDFHandlerException, RepositoryException {
-		assertStatementsSize(null, p, o, expected);
-	}
-
-	/**
-	 * Asserts that the given pattern <code>(_ p o)</code> satisfies the
-	 * expected number of statements.
-	 *
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @param expected
-	 *            expected matches.
-	 * @throws RepositoryException
-	 */
-	protected void assertStatementsSize(IRI p, String o, int expected)
-			throws RDFHandlerException, RepositoryException {
-		assertStatementsSize(p, o == null ? null : RDFUtils.literal(o),
-				expected);
-	}
-
-	/**
-	 * Asserts that the given pattern <code>(s p _)</code> is not present.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @throws RepositoryException
-	 */
-	protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
-		RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
-				true);
-		try {
-			Assert.assertFalse("Expected no statements.", statements.hasNext());
-		} finally {
-			statements.close();
-		}
-	}
-
-	/**
-	 * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it
-	 * is expected to exists and be just one.
-	 *
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @return the matching blank subject.
-	 * @throws RepositoryException
-	 */
-	protected Resource findExactlyOneBlankSubject(IRI p, Value o)
-			throws RepositoryException {
-		RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
-		try {
-			Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
-			Statement stmt = it.next();
-			Resource result = stmt.getSubject();
-			Assert.assertTrue(getFailedExtractionMessage(),
-					result instanceof BNode);
-			Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
-			return result;
-		} finally {
-			it.close();
-		}
-	}
-
-	/**
-	 * Returns the object matching the pattern <code>(s p o)</code>, it is
-	 * expected to exists and be just one.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @return the matching object.
-	 * @throws RepositoryException
-	 */
-	protected Value findExactlyOneObject(Resource s, IRI p)
-			throws RepositoryException {
-		RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
-		try {
-			Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
-			return it.next().getObject();
-		} finally {
-			it.close();
-		}
-	}
-
-	/**
-	 * Returns all the subjects matching the pattern <code>(s? p o)</code>.
-	 *
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @return list of matching subjects.
-	 * @throws RepositoryException
-	 */
-	protected List<Resource> findSubjects(IRI p, Value o)
-			throws RepositoryException {
-		RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
-		List<Resource> subjects = new ArrayList<Resource>();
-		try {
-			Statement statement;
-			while (it.hasNext()) {
-				statement = it.next();
-				subjects.add(statement.getSubject());
-			}
-		} finally {
-			it.close();
-		}
-		return subjects;
-	}
-
-	/**
-	 * Returns all the objects matching the pattern <code>(s p _)</code>.
-	 *
-	 * @param s
-	 *            predicate.
-	 * @param p
-	 *            predicate.
-	 * @return list of matching objects.
-	 * @throws RepositoryException
-	 */
-	protected List<Value> findObjects(Resource s, IRI p)
-			throws RepositoryException {
-		RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
-		List<Value> objects = new ArrayList<Value>();
-		try {
-			Statement statement;
-			while (it.hasNext()) {
-				statement = it.next();
-				objects.add(statement.getObject());
-			}
-		} finally {
-			it.close();
-		}
-		return objects;
-	}
-
-	/**
-	 * Finds the object matching the pattern <code>(s p _)</code>, asserts to
-	 * find exactly one result.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate
-	 * @return matching object.
-	 * @throws org.openrdf.repository.RepositoryException
-	 */
-	protected Value findObject(Resource s, IRI p) throws RepositoryException {
-		RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
-				true);
-		try {
-			Assert.assertTrue("Expected at least a statement.",
-					statements.hasNext());
-			return (statements.next().getObject());
-		} finally {
-			statements.close();
-		}
-	}
-
-	/**
-	 * Finds the resource object matching the pattern <code>(s p _)</code>,
-	 * asserts to find exactly one result.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @return matching object.
-	 * @throws RepositoryException
-	 */
-	protected Resource findObjectAsResource(Resource s, IRI p)
-			throws RepositoryException {
-		final Value v = findObject(s, p);
-		try {
-			return (Resource) v;
-		} catch (ClassCastException cce) {
-			Assert.fail("Expected resource object, found: "
-					+ v.getClass().getSimpleName());
-			throw new IllegalStateException();
-		}
-	}
-
-	/**
-	 * Finds the literal object matching the pattern <code>(s p _)</code>,
-	 * asserts to find exactly one result.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @return matching object.
-	 * @throws RepositoryException
-	 */
-	protected String findObjectAsLiteral(Resource s, IRI p)
-			throws RepositoryException {
-		return findObject(s, p).stringValue();
-	}
-
-	/**
-	 * Dumps the extracted model in <i>Turtle</i> format.
-	 *
-	 * @return a string containing the model in Turtle.
-	 * @throws RepositoryException
-	 */
-	protected String dumpModelToTurtle() throws RepositoryException {
-		StringWriter w = new StringWriter();
-		try {
-			conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
-			return w.toString();
-		} catch (RDFHandlerException ex) {
-			throw new RuntimeException(ex);
-		}
-	}
-
-	/**
-	 * Dumps the extracted model in <i>NQuads</i> format.
-	 *
-	 * @return a string containing the model in NQuads.
-	 * @throws RepositoryException
-	 */
-	protected String dumpModelToNQuads() throws RepositoryException {
-		StringWriter w = new StringWriter();
-		try {
-			conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
-			return w.toString();
-		} catch (RDFHandlerException ex) {
-			throw new RuntimeException(ex);
-		}
-	}
-
-	/**
-	 * Dumps the extracted model in <i>RDFXML</i> format.
-	 *
-	 * @return a string containing the model in RDFXML.
-	 * @throws RepositoryException
-	 */
-	protected String dumpModelToRDFXML() throws RepositoryException {
-		StringWriter w = new StringWriter();
-		try {
-			conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
-			return w.toString();
-		} catch (RDFHandlerException ex) {
-			throw new RuntimeException(ex);
-		}
-	}
-
-	/**
-	 * Dumps the list of statements contained in the extracted model.
-	 *
-	 * @return list of extracted statements.
-	 * @throws RepositoryException
-	 */
-	protected List<Statement> dumpAsListOfStatements()
-			throws RepositoryException {
-		return conn.getStatements(null, null, null, false).asList();
-	}
-
-	/**
-	 * @return string containing human readable statements.
-	 * @throws RepositoryException
-	 */
-	protected String dumpHumanReadableTriples() throws RepositoryException {
-		StringBuilder sb = new StringBuilder();
-		RepositoryResult<Statement> result = conn.getStatements(null, null,
-				null, false);
-		while (result.hasNext()) {
-			Statement statement = result.next();
-			sb.append(String.format("%s %s %s %s\n", statement.getSubject(),
-					statement.getPredicate(), statement.getObject(),
-					statement.getContext()));
-
-		}
-		return sb.toString();
-	}
-
-	/**
-	 * Checks that a statement is contained in the extracted model. If the
-	 * statement declares bnodes, they are replaced with <code>_</code>
-	 * patterns.
-	 *
-	 * @param statement
-	 * @throws RepositoryException
-	 */
-	// TODO: bnode check is too weak, introduce graph omomorphism check.
-	protected void assertContains(Statement statement)
-			throws RepositoryException {
-		Assert.assertTrue("Cannot find statement " + statement + " in model.",
-				conn.hasStatement(
-						statement.getSubject() instanceof BNode ? null
-								: statement.getSubject(), statement
-								.getPredicate(),
-						statement.getObject() instanceof BNode ? null
-								: statement.getObject(), false));
-	}
-
-	/**
-	 * Assert that the model contains the statement <code>(s p l)</code> where
-	 * <code>l</code> is a literal.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param l
-	 *            literal content.
-	 * @throws RepositoryException
-	 */
-	protected void assertContains(Resource s, IRI p, String l)
-			throws RepositoryException {
-		assertContains(s, p, RDFUtils.literal(l));
-	}
-
-	/**
-	 * Assert that the model contains the statement <code>(s p l)</code> where
-	 * <code>l</code> is a language literal.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param l
-	 *            literal content.
-	 * @param lang
-	 *            literal language.
-	 * @throws RepositoryException
-	 */
-	protected void assertContains(Resource s, IRI p, String l, String lang)
-			throws RepositoryException {
-		assertContains(s, p, RDFUtils.literal(l, lang));
-	}
-
-	/**
-	 * Returns all statements matching the pattern <code>(s p o)</code>.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @return list of statements.
-	 * @throws RepositoryException
-	 */
-	protected RepositoryResult<Statement> getStatements(Resource s, IRI p,
-			Value o) throws RepositoryException {
-		return conn.getStatements(s, p, o, false);
-	}
-
-	/**
-	 * Counts all statements matching the pattern <code>(s p o)</code>.
-	 *
-	 * @param s
-	 *            subject.
-	 * @param p
-	 *            predicate.
-	 * @param o
-	 *            object.
-	 * @return number of matches.
-	 * @throws RepositoryException
-	 */
-	protected int getStatementsSize(Resource s, IRI p, Value o)
-			throws RepositoryException {
-		RepositoryResult<Statement> result = getStatements(s, p, o);
-		int count = 0;
-		try {
-			while (result.hasNext()) {
-				result.next();
-				count++;
-			}
-		} finally {
-			result.close();
-		}
-		return count;
-	}
-
-	private String getFailedExtractionMessage() throws RepositoryException {
-		return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
-	}
+  /**
+   * Base test document.
+   */
+  //TODO: change base IRI string.
+  protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/"); 
+
+  /**
+   * Internal connection used to collect extraction results.
+   */
+  protected RepositoryConnection conn;
+
+  /**
+   * The latest generated report.
+   */
+  private SingleDocumentExtractionReport report;
+
+  private Sail store;
+
+  private SailRepository repository;
+
+  /**
+   * Constructor.
+   */
+  public AbstractExtractorTestCase() {
+    super();
+  }
+
+  /**
+   * @return the factory of the extractor to be tested.
+   */
+  protected abstract ExtractorFactory<?> getExtractorFactory();
+
+  /**
+   * Test case initialization.
+   * 
+   * @throws Exception
+   */
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    store = new MemoryStore();
+    repository = new SailRepository(store);
+    repository.initialize();
+    conn = repository.getConnection();
+  }
+
+  /**
+   * Test case resources release.
+   *
+   * @throws RepositoryException
+   */
+  @After
+  public void tearDown() throws RepositoryException {
+    try {
+      conn.close();
+    } finally {
+      repository.shutDown();
+    }
+    conn = null;
+    report = null;
+    store = null;
+    repository = null;
+  }
+
+  /**
+   * @return the connection to the memory repository.
+   */
+  protected RepositoryConnection getConnection() {
+    return conn;
+  }
+
+  /**
+   * @return the last generated report.
+   */
+  protected SingleDocumentExtractionReport getReport() {
+    return report;
+  }
+
+  /**
+   * Returns the list of issues raised by a given extractor.
+   *
+   * @param extractorName
+   *            name of the extractor.
+   * @return collection of issues.
+   */
+  protected Collection<IssueReport.Issue> getIssues(String extractorName) {
+    for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report
+            .getExtractorToIssues().entrySet()) {
+      if (issueEntry.getKey().equals(extractorName)) {
+        return issueEntry.getValue();
+      }
+    }
+    return Collections.emptyList();
+  }
+
+  /**
+   * Returns the list of issues raised by the extractor under testing.
+   *
+   * @return collection of issues.
+   */
+  protected Collection<IssueReport.Issue> getIssues() {
+    return getIssues(getExtractorFactory().getExtractorName());
+  }
+
+  /**
+   * Applies the extractor provided by the {@link #getExtractorFactory()} to
+   * the specified resource.
+   *
+   * @param resource
+   *            resource name.
+   * @throws org.apache.any23.extractor.ExtractionException
+   * @throws IOException
+   */
+  // TODO: MimeType detector to null forces the execution of all extractors,
+  // but extraction
+  // tests should be based on mimetype detection.
+  protected void extract(String resource) throws ExtractionException,
+  IOException {
+    SingleDocumentExtraction ex = new SingleDocumentExtraction(
+            new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI
+                    .toString()), getExtractorFactory(),
+            new RepositoryWriter(conn));
+    ex.setMIMETypeDetector(null);
+    report = ex.run();
+  }
+
+  /**
+   * Performs data extraction over the content of a resource and assert that
+   * the extraction was fine.
+   *
+   * @param resource
+   *            resource name.
+   * @param assertNoIssues
+   *            if <code>true</code>invokes {@link #assertNoIssues()} after
+   *            the extraction.
+   */
+  protected void assertExtract(String resource, boolean assertNoIssues) {
+    try {
+      extract(resource);
+      if (assertNoIssues)
+        assertNoIssues();
+    } catch (ExtractionException ex) {
+      throw new RuntimeException(ex);
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  /**
+   * Performs data extraction over the content of a resource and assert that
+   * the extraction was fine and raised no issues.
+   *
+   * @param resource
+   */
+  protected void assertExtract(String resource) {
+    assertExtract(resource, true);
+  }
+
+  /**
+   * Asserts that the extracted triples contain the pattern
+   * <code>(_ p o)</code>.
+   *
+   * @param p
+   *            predicate
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertContains(IRI p, Resource o) throws RepositoryException {
+    assertContains(null, p, o);
+  }
+
+  /**
+   * Asserts that the extracted triples contain the pattern
+   * <code>(_ p o)</code>.
+   *
+   * @param p
+   *            predicate
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertContains(IRI p, String o) throws RepositoryException {
+    assertContains(null, p, RDFUtils.literal(o));
+  }
+
+  /**
+   * Asserts that the extracted triples contain the pattern
+   * <code>(_ p o)</code>.
+   *
+   * @param p
+   *            predicate
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertNotContains(IRI p, Resource o)
+          throws RepositoryException {
+    assertNotContains(null, p, o);
+  }
+
+  /**
+   * Asserts that the extracted triples contain the pattern
+   * <code>(s p o)</code>.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertContains(Resource s, IRI p, Value o)
+          throws RepositoryException {
+    Assert.assertTrue(
+            getFailedExtractionMessage()
+            + String.format("Cannot find triple (%s %s %s)", s, p,
+                    o), conn.hasStatement(s, p, o, false));
+  }
+
+  /**
+   * Asserts that the extracted triples contain the pattern
+   * <code>(s p o)</code>.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertNotContains(Resource s, IRI p, String o)
+          throws RepositoryException {
+    Assert.assertFalse(getFailedExtractionMessage(),
+            conn.hasStatement(s, p, RDFUtils.literal(o), false));
+  }
+
+  /**
+   * Asserts that the extracted triples contain the pattern
+   * <code>(s p o)</code>.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertNotContains(Resource s, IRI p, Resource o)
+          throws RepositoryException {
+    Assert.assertFalse(getFailedExtractionMessage(),
+            conn.hasStatement(s, p, o, false));
+  }
+
+  /**
+   * Asserts that the model contains at least a statement.
+   *
+   * @throws RepositoryException
+   */
+  protected void assertModelNotEmpty() throws RepositoryException {
+    Assert.assertFalse("The model is expected to not be empty."
+            + getFailedExtractionMessage(), conn.isEmpty());
+  }
+
+  /**
+   * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @throws RepositoryException
+   */
+  protected void assertNotContains(Resource s, IRI p, Literal o)
+          throws RepositoryException {
+    Assert.assertFalse(getFailedExtractionMessage(),
+            conn.hasStatement(s, p, o, false));
+  }
+
+  /**
+   * Asserts that the model is expected to contains no statements.
+   *
+   * @throws RepositoryException
+   */
+  protected void assertModelEmpty() throws RepositoryException {
+    Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
+  }
+
+  /**
+   * Asserts that the extraction generated no issues.
+   */
+  protected void assertNoIssues() {
+    for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report
+            .getExtractorToIssues().entrySet()) {
+      if (entry.getValue().size() > 0) {
+        System.out.println("Unexpected issue for extractor " + entry.getKey()
+        + " : " + entry.getValue());
+      }
+      for(Issue nextIssue : entry.getValue()) {
+        if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
+          Assert.fail("Unexpected issue for extractor " + entry.getKey()
+          + " : " + entry.getValue());
+        }
+      }
+    }
+  }
+
+  /**
+   * Asserts that an issue has been produced by the processed
+   * {@link org.apache.any23.extractor.Extractor}.
+   *
+   * @param level
+   *            expected issue level
+   * @param issueRegex
+   *            regex matching the expected human readable issue message.
+   */
+  protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
+    final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory()
+            .getExtractorName());
+    boolean found = false;
+    for (IssueReport.Issue issue : issues) {
+      if (issue.getLevel() == level
+              && issue.getMessage().matches(issueRegex)) {
+        found = true;
+        break;
+      }
+    }
+    Assert.assertTrue(String.format(
+            "Cannot find issue with level %s matching expression '%s'",
+            level, issueRegex), found);
+  }
+
+  /**
+   * Verifies that the current model contains all the given statements.
+   *
+   * @param statements
+   *            list of statements to be verified.
+   * @throws RepositoryException
+   */
+  public void assertContainsModel(Statement[] statements)
+          throws RepositoryException {
+    for (Statement statement : statements) {
+      assertContains(statement);
+    }
+  }
+
+  /**
+   * Verifies that the current model contains all the statements declared in
+   * the specified <code>modelFile</code>.
+   *
+   * @param modelResource
+   *            the resource containing the model.
+   * @throws RDFHandlerException
+   * @throws IOException
+   * @throws RDFParseException
+   * @throws RepositoryException
+   */
+  public void assertContainsModel(String modelResource)
+          throws RDFHandlerException, IOException, RDFParseException,
+          RepositoryException {
+    getConnection().remove(null, SINDICE.getInstance().date, (Value) null,
+            (Resource) null);
+    getConnection().remove(null, SINDICE.getInstance().size, (Value) null,
+            (Resource) null);
+    assertContainsModel(RDFUtils.parseRDF(modelResource));
+  }
+
+  /**
+   * Asserts that the given pattern <code>(s p o)</code> satisfies the
+   * expected number of statements.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @param expected
+   *            expected matches.
+   * @throws RepositoryException
+   */
+  protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
+          throws RDFHandlerException, RepositoryException {
+    int statementsSize = getStatementsSize(s, p, o);
+    if (statementsSize != expected) {
+      getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out));
+    }
+
+    Assert.assertEquals("Unexpected number of matching statements.",
+            expected, statementsSize);
+  }
+
+  /**
+   * Asserts that the given pattern <code>(_ p o)</code> satisfies the
+   * expected number of statements.
+   *
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @param expected
+   *            expected matches.
+   * @throws RepositoryException
+   */
+  protected void assertStatementsSize(IRI p, Value o, int expected)
+          throws RDFHandlerException, RepositoryException {
+    assertStatementsSize(null, p, o, expected);
+  }
+
+  /**
+   * Asserts that the given pattern <code>(_ p o)</code> satisfies the
+   * expected number of statements.
+   *
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @param expected
+   *            expected matches.
+   * @throws RepositoryException
+   */
+  protected void assertStatementsSize(IRI p, String o, int expected)
+          throws RDFHandlerException, RepositoryException {
+    assertStatementsSize(p, o == null ? null : RDFUtils.literal(o),
+            expected);
+  }
+
+  /**
+   * Asserts that the given pattern <code>(s p _)</code> is not present.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @throws RepositoryException
+   */
+  protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
+    RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
+            true);
+    try {
+      Assert.assertFalse("Expected no statements.", statements.hasNext());
+    } finally {
+      statements.close();
+    }
+  }
+
+  /**
+   * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it
+   * is expected to exists and be just one.
+   *
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @return the matching blank subject.
+   * @throws RepositoryException
+   */
+  protected Resource findExactlyOneBlankSubject(IRI p, Value o)
+          throws RepositoryException {
+    RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
+    try {
+      Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
+      Statement stmt = it.next();
+      Resource result = stmt.getSubject();
+      Assert.assertTrue(getFailedExtractionMessage(),
+              result instanceof BNode);
+      Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
+      return result;
+    } finally {
+      it.close();
+    }
+  }
+
+  /**
+   * Returns the object matching the pattern <code>(s p o)</code>, it is
+   * expected to exists and be just one.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @return the matching object.
+   * @throws RepositoryException
+   */
+  protected Value findExactlyOneObject(Resource s, IRI p)
+          throws RepositoryException {
+    RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
+    try {
+      Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
+      return it.next().getObject();
+    } finally {
+      it.close();
+    }
+  }
+
+  /**
+   * Returns all the subjects matching the pattern <code>(s? p o)</code>.
+   *
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @return list of matching subjects.
+   * @throws RepositoryException
+   */
+  protected List<Resource> findSubjects(IRI p, Value o)
+          throws RepositoryException {
+    RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
+    List<Resource> subjects = new ArrayList<Resource>();
+    try {
+      Statement statement;
+      while (it.hasNext()) {
+        statement = it.next();
+        subjects.add(statement.getSubject());
+      }
+    } finally {
+      it.close();
+    }
+    return subjects;
+  }
+
+  /**
+   * Returns all the objects matching the pattern <code>(s p _)</code>.
+   *
+   * @param s
+   *            predicate.
+   * @param p
+   *            predicate.
+   * @return list of matching objects.
+   * @throws RepositoryException
+   */
+  protected List<Value> findObjects(Resource s, IRI p)
+          throws RepositoryException {
+    RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
+    List<Value> objects = new ArrayList<Value>();
+    try {
+      Statement statement;
+      while (it.hasNext()) {
+        statement = it.next();
+        objects.add(statement.getObject());
+      }
+    } finally {
+      it.close();
+    }
+    return objects;
+  }
+
+  /**
+   * Finds the object matching the pattern <code>(s p _)</code>, asserts to
+   * find exactly one result.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate
+   * @return matching object.
+   * @throws org.openrdf.repository.RepositoryException
+   */
+  protected Value findObject(Resource s, IRI p) throws RepositoryException {
+    RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
+            true);
+    try {
+      Assert.assertTrue("Expected at least a statement.",
+              statements.hasNext());
+      return (statements.next().getObject());
+    } finally {
+      statements.close();
+    }
+  }
+
+  /**
+   * Finds the resource object matching the pattern <code>(s p _)</code>,
+   * asserts to find exactly one result.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @return matching object.
+   * @throws RepositoryException
+   */
+  protected Resource findObjectAsResource(Resource s, IRI p)
+          throws RepositoryException {
+    final Value v = findObject(s, p);
+    try {
+      return (Resource) v;
+    } catch (ClassCastException cce) {
+      Assert.fail("Expected resource object, found: "
+              + v.getClass().getSimpleName());
+      throw new IllegalStateException();
+    }
+  }
+
+  /**
+   * Finds the literal object matching the pattern <code>(s p _)</code>,
+   * asserts to find exactly one result.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @return matching object.
+   * @throws RepositoryException
+   */
+  protected String findObjectAsLiteral(Resource s, IRI p)
+          throws RepositoryException {
+    return findObject(s, p).stringValue();
+  }
+
+  /**
+   * Dumps the extracted model in <i>Turtle</i> format.
+   *
+   * @return a string containing the model in Turtle.
+   * @throws RepositoryException
+   */
+  protected String dumpModelToTurtle() throws RepositoryException {
+    StringWriter w = new StringWriter();
+    try {
+      conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
+      return w.toString();
+    } catch (RDFHandlerException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  /**
+   * Dumps the extracted model in <i>NQuads</i> format.
+   *
+   * @return a string containing the model in NQuads.
+   * @throws RepositoryException
+   */
+  protected String dumpModelToNQuads() throws RepositoryException {
+    StringWriter w = new StringWriter();
+    try {
+      conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
+      return w.toString();
+    } catch (RDFHandlerException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  /**
+   * Dumps the extracted model in <i>RDFXML</i> format.
+   *
+   * @return a string containing the model in RDFXML.
+   * @throws RepositoryException
+   */
+  protected String dumpModelToRDFXML() throws RepositoryException {
+    StringWriter w = new StringWriter();
+    try {
+      conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
+      return w.toString();
+    } catch (RDFHandlerException ex) {
+      throw new RuntimeException(ex);
+    }
+  }
+
+  /**
+   * Dumps the list of statements contained in the extracted model.
+   *
+   * @return list of extracted statements.
+   * @throws RepositoryException
+   */
+  protected List<Statement> dumpAsListOfStatements()
+          throws RepositoryException {
+    return Iterations.asList(conn.getStatements(null, null, null, false));
+  }
+
+  /**
+   * @return string containing human readable statements.
+   * @throws RepositoryException
+   */
+  protected String dumpHumanReadableTriples() throws RepositoryException {
+    StringBuilder sb = new StringBuilder();
+    RepositoryResult<Statement> result = conn.getStatements(null, null,
+            null, false);
+    while (result.hasNext()) {
+      Statement statement = result.next();
+      sb.append(String.format("%s %s %s %s\n", statement.getSubject(),
+              statement.getPredicate(), statement.getObject(),
+              statement.getContext()));
+
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Checks that a statement is contained in the extracted model. If the
+   * statement declares bnodes, they are replaced with <code>_</code>
+   * patterns.
+   *
+   * @param statement
+   * @throws RepositoryException
+   */
+  // TODO: bnode check is too weak, introduce graph omomorphism check.
+  protected void assertContains(Statement statement)
+          throws RepositoryException {
+    Assert.assertTrue("Cannot find statement " + statement + " in model.",
+            conn.hasStatement(
+                    statement.getSubject() instanceof BNode ? null
+                            : statement.getSubject(), statement
+                            .getPredicate(),
+                            statement.getObject() instanceof BNode ? null
+                                    : statement.getObject(), false));
+  }
+
+  /**
+   * Assert that the model contains the statement <code>(s p l)</code> where
+   * <code>l</code> is a literal.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param l
+   *            literal content.
+   * @throws RepositoryException
+   */
+  protected void assertContains(Resource s, IRI p, String l)
+          throws RepositoryException {
+    assertContains(s, p, RDFUtils.literal(l));
+  }
+
+  /**
+   * Assert that the model contains the statement <code>(s p l)</code> where
+   * <code>l</code> is a language literal.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param l
+   *            literal content.
+   * @param lang
+   *            literal language.
+   * @throws RepositoryException
+   */
+  protected void assertContains(Resource s, IRI p, String l, String lang)
+          throws RepositoryException {
+    assertContains(s, p, RDFUtils.literal(l, lang));
+  }
+
+  /**
+   * Returns all statements matching the pattern <code>(s p o)</code>.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @return list of statements.
+   * @throws RepositoryException
+   */
+  protected RepositoryResult<Statement> getStatements(Resource s, IRI p,
+          Value o) throws RepositoryException {
+    return conn.getStatements(s, p, o, false);
+  }
+
+  /**
+   * Counts all statements matching the pattern <code>(s p o)</code>.
+   *
+   * @param s
+   *            subject.
+   * @param p
+   *            predicate.
+   * @param o
+   *            object.
+   * @return number of matches.
+   * @throws RepositoryException
+   */
+  protected int getStatementsSize(Resource s, IRI p, Value o)
+          throws RepositoryException {
+    RepositoryResult<Statement> result = getStatements(s, p, o);
+    int count = 0;
+    try {
+      while (result.hasNext()) {
+        result.next();
+        count++;
+      }
+    } finally {
+      result.close();
+    }
+    return count;
+  }
+
+  private String getFailedExtractionMessage() throws RepositoryException {
+    return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
+  }
 
 }
\ No newline at end of file


[5/6] any23 git commit: Resolve merge conflict between master and ANY23-320

Posted by le...@apache.org.
Resolve merge conflict between master and ANY23-320


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/46408604
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/46408604
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/46408604

Branch: refs/heads/master
Commit: 46408604a18ec289e252093a67f5ea010eed4488
Parents: 60e93a7 d2ace9c
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jan 3 00:19:05 2018 +0000
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jan 3 00:19:05 2018 +0000

----------------------------------------------------------------------
 api/src/main/java/org/apache/any23/vocab/YAML.java            | 7 +++++--
 .../org/apache/any23/extractor/yaml/ElementsProcessor.java    | 6 ++----
 .../apache/any23/extractor/yaml/ElementsProcessorTest.java    | 1 -
 .../org/apache/any23/extractor/yaml/YAMLExtractorTest.java    | 7 +------
 .../test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java  | 4 ++--
 .../resources/org/apache/any23/extractor/yaml/test-null.yml   | 4 +++-
 6 files changed, 13 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/46408604/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
----------------------------------------------------------------------
diff --cc core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
index 90863ac,a4604e4..75c6611
--- a/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
@@@ -104,18 -100,16 +104,17 @@@ public class ElementsProcessor 
       * created.
       * @return instance of {@link ModelHolder},
       */
 +    @SuppressWarnings("unchecked")
      public ModelHolder asModel(IRI namespace, final Object t, Value rootNode) {
-         if (t == null) {
-             return null;
-         }
  
          if (t instanceof List) {
 -            return processList(namespace, (List) t);
 +            return processList(namespace, (List<Object>) t);
          } else if (t instanceof Map) {
 -            return processMap(namespace, (Map) t, rootNode);
 +            return processMap(namespace, (Map<String, Object>) t, rootNode);
          } else if (t instanceof String) {
              return asModelHolder(RDFUtils.makeIRI(t.toString()), modelFactory.createEmptyModel());
+         } else if (t == null) {
+             return asModelHolder(vocab.nullValue, modelFactory.createEmptyModel());
          } else {
              return asModelHolder(Literals.createLiteral(vf, t), modelFactory.createEmptyModel());
          }


[4/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and ANY23-131 Nested Microdata are not extracted

Posted by le...@apache.org.
ANY23-320 Address @Ignore tests in Any23 and ANY23-131 Nested Microdata are not extracted


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/60e93a76
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/60e93a76
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/60e93a76

Branch: refs/heads/master
Commit: 60e93a76748e53c413529409fb545e2245013639
Parents: 0613280
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Jan 1 02:58:36 2018 +0000
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Jan 1 02:58:36 2018 +0000

----------------------------------------------------------------------
 .../any23/cli/ExtractorDocumentationTest.java   |    2 -
 .../java/org/apache/any23/cli/RoverTest.java    |    2 -
 .../org/apache/any23/cli/SimpleRoverTest.java   |    2 -
 .../any23/extractor/csv/CSVExtractor.java       |   23 +-
 .../extractor/html/EmbeddedJSONLDExtractor.java |  363 ++--
 .../any23/extractor/html/HTMLMetaExtractor.java |   58 +-
 .../apache/any23/extractor/microdata/Item.java  |   10 +-
 .../extractor/microdata/ItemPropValue.java      |   31 +-
 .../any23/extractor/microdata/ItemScope.java    |   29 +-
 .../extractor/microdata/MicrodataExtractor.java |   35 +-
 .../extractor/microdata/MicrodataParser.java    |  136 +-
 .../any23/extractor/xpath/QuadTemplate.java     |    1 +
 .../any23/extractor/xpath/TemplateObject.java   |   39 +-
 .../any23/extractor/xpath/TemplateSubject.java  |   13 +-
 .../any23/extractor/yaml/ElementsProcessor.java |   24 +-
 .../any23/rdf/Any23ValueFactoryWrapper.java     |   61 +-
 .../java/org/apache/any23/rdf/RDFUtils.java     |   82 +-
 .../XMLValidationReportSerializer.java          |   21 +-
 .../any23/validator/rule/AboutNotURIRule.java   |    1 +
 .../validator/rule/MetaNameMisuseRule.java      |    1 +
 .../org/apache/any23/vocab/RDFSchemaUtils.java  |   24 +-
 .../any23/extractor/csv/CSVExtractorTest.java   |  178 +-
 .../html/AbstractExtractorTestCase.java         | 1592 ++++++++-------
 .../extractor/html/HCardExtractorTest.java      | 1852 +++++++++---------
 .../extractor/html/HListingExtractorTest.java   |    3 -
 .../microdata/MicrodataParserTest.java          |   81 +-
 .../any23/extractor/rdfa/RDFaExtractorTest.java |    2 -
 .../TemplateXPathExtractorRuleImplTest.java     |   24 +-
 .../any23/filter/IgnoreAccidentalRDFaTest.java  |    2 +-
 .../org/apache/any23/writer/JSONWriterTest.java |    4 -
 .../org/apache/any23/servlet/ServletTest.java   |    4 -
 ....2.1-non-normative-example-1-expected.nquads |    8 +-
 .../5.2.1-non-normative-example-1.html          |   48 +-
 ....2.1-non-normative-example-2-expected.nquads |   33 +-
 .../5.2.1-non-normative-example-2.html          |   16 +-
 .../microdata-basic-expected.properties         |    6 +-
 .../resources/microdata/microdata-basic.html    |   15 +-
 .../microdata-itemref-expected.properties       |   20 +-
 .../resources/microdata/microdata-itemref.html  |   46 +-
 .../microdata/microdata-json-serialization.json |    2 +-
 .../microdata/microdata-nested-expected.nquads  |   19 +-
 .../microdata-nested-expected.properties        |    4 +-
 .../resources/microdata/microdata-nested.html   |   33 +-
 .../microdata-richsnippet-expected.nquads       |   27 +-
 44 files changed, 2462 insertions(+), 2515 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
index 98616ba..97bf4f2 100644
--- a/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
@@ -42,13 +42,11 @@ public class ExtractorDocumentationTest extends ToolTestBase {
         runToolCheckExit0("--all");
     }
 
-    //@Ignore("no available example")
     @Test
     public void testExampleInput() throws Exception {
         runToolCheckExit0("-i", TARGET_EXTRACTOR);
     }
 
-    //@Ignore("no available example")
     @Test
     public void testExampleOutput() throws Exception {
         runToolCheckExit0("-o", TARGET_EXTRACTOR);

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/cli/src/test/java/org/apache/any23/cli/RoverTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/RoverTest.java b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
index 893220a..7bab314 100644
--- a/cli/src/test/java/org/apache/any23/cli/RoverTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
@@ -23,7 +23,6 @@ import org.apache.any23.util.StringUtils;
 import org.apache.any23.util.URLUtils;
 import org.junit.Assert;
 import org.junit.Assume;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.eclipse.rdf4j.model.Statement;
 import org.eclipse.rdf4j.rio.RDFFormat;
@@ -36,7 +35,6 @@ import java.util.Arrays;
  *
  * @author Michele Mostarda (mostarda@fbk.eu)
  */
-@Ignore("Twitter microdata not parsing correctly right now")
 public class RoverTest extends ToolTestBase {
 
     private static final String[] TARGET_FILES = {

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java b/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
index f659539..b4c10ad 100644
--- a/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
@@ -22,7 +22,6 @@ import java.util.Collection;
 import org.apache.any23.util.FileUtils;
 import org.apache.pdfbox.util.Charsets;
 import org.junit.Assert;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -104,7 +103,6 @@ public class SimpleRoverTest extends ToolTestBase {
      * @throws Exception 
      */
     @Test
-    @Ignore
     public void ref310ExtendedTest()
             throws Exception {
         File outputFile = File.createTempFile("rover-test", ".ttl", tempDirectory);

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
index 7536304..e72162b 100644
--- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
@@ -58,12 +58,15 @@ public class CSVExtractor implements Extractor.ContentExtractor {
     /**
      * {@inheritDoc}
      */
+    @Override
     public void setStopAtFirstError(boolean f) {
+      //not implemented
     }
 
     /**
      * {@inheritDoc}
      */
+    @Override
     public void run(
             ExtractionParameters extractionParameters,
             ExtractionContext extractionContext,
@@ -85,7 +88,7 @@ public class CSVExtractor implements Extractor.ContentExtractor {
         String[] nextLine;
         int index = 0;
         while ((nextLine = csvParser.getLine()) != null) {
-            IRI rowSubject = RDFUtils.uri(
+            IRI rowSubject = RDFUtils.iri(
                     documentIRI.toString(),
                     "row/" + index
             );
@@ -194,11 +197,11 @@ public class CSVExtractor implements Extractor.ContentExtractor {
     }
 
     private IRI normalize(String toBeNormalized, IRI documentIRI) {
-        toBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
+      String newToBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
 
         StringBuilder result = new StringBuilder(documentIRI.toString());
 
-        StringTokenizer tokenizer = new StringTokenizer(toBeNormalized, " ");
+        StringTokenizer tokenizer = new StringTokenizer(newToBeNormalized, " ");
         while (tokenizer.hasMoreTokens()) {
             String current = tokenizer.nextToken();
 
@@ -228,7 +231,7 @@ public class CSVExtractor implements Extractor.ContentExtractor {
                 // there are some row cells that don't have an associated column name
                 break;
             }
-            if (cell.equals("")) {
+            if ("".equals(cell)) {
                 index++;
                 continue;
             }
@@ -241,17 +244,17 @@ public class CSVExtractor implements Extractor.ContentExtractor {
 
     private Value getObjectFromCell(String cell) {
         Value object;
-        cell = cell.trim();
-        if (RDFUtils.isAbsoluteIRI(cell)) {
-            object = SimpleValueFactory.getInstance().createIRI(cell);
+        String newCell = cell.trim();
+        if (RDFUtils.isAbsoluteIRI(newCell)) {
+            object = SimpleValueFactory.getInstance().createIRI(newCell);
         } else {
             IRI datatype = XMLSchema.STRING;
-            if (isInteger(cell)) {
+            if (isInteger(newCell)) {
                 datatype = XMLSchema.INTEGER;
-            } else if(isFloat(cell)) {
+            } else if(isFloat(newCell)) {
                 datatype = XMLSchema.FLOAT;
             }
-            object = SimpleValueFactory.getInstance().createLiteral(cell, datatype);
+            object = SimpleValueFactory.getInstance().createLiteral(newCell, datatype);
         }
         return object;
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index db58586..34728e5 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -28,7 +28,6 @@ import org.apache.any23.extractor.rdf.JSONLDExtractorFactory;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.vocab.SINDICE;
 import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.NamedNodeMap;
@@ -52,205 +51,167 @@ import java.util.Set;
  */
 public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
 
-	private static final SINDICE vSINDICE = SINDICE.getInstance();
-
-	private IRI profile;
-
-	private Map<String, IRI> prefixes = new HashMap<>();
-
-	private String documentLang;
-
-	private JSONLDExtractor extractor;
-
-	/**
-	 * {@inheritDoc}
-	 */
-	@Override
-	public void run(ExtractionParameters extractionParameters,
-			ExtractionContext extractionContext, Document in,
-			ExtractionResult out) throws IOException, ExtractionException {
-		profile = extractProfile(in);
-		documentLang = getDocumentLanguage(in);
-		extractLinkDefinedPrefixes(in);
-
-		String baseProfile = vSINDICE.NS;
-		if (profile != null) {
-			baseProfile = profile.toString();
-		}
-
-		final IRI documentIRI = extractionContext.getDocumentIRI();
-		Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile,
-				extractionParameters, extractionContext, out);
-		for (JSONLDScript jsonldScript : jsonldScripts) {
-			//String lang = documentLang;
-			//if (jsonldScript.getLang() != null) {
-			//	lang = jsonldScript.getLang();
-			//}
-			//out.writeTriple(documentIRI, jsonldScript.getName(),
-			//		SimpleValueFactory.getInstance().createLiteral(jsonldScript.getContent(), lang));
-		}
-	}
-
-	/**
-	 * Returns the {@link Document} language if declared, <code>null</code>
-	 * otherwise.
-	 *
-	 * @param in
-	 *            a instance of {@link Document}.
-	 * @return the language declared, could be <code>null</code>.
-	 */
-	private String getDocumentLanguage(Document in) {
-		String lang = DomUtils.find(in, "string(/HTML/@lang)");
-		if (lang.equals("")) {
-			return null;
-		}
-		return lang;
-	}
-
-	private IRI extractProfile(Document in) {
-		String profile = DomUtils.find(in, "string(/HTML/@profile)");
-		if (profile.equals("")) {
-			return null;
-		}
-		return SimpleValueFactory.getInstance().createIRI(profile);
-	}
-
-	/**
-	 * It extracts prefixes defined in the <i>LINK</i> meta tags.
-	 *
-	 * @param in
-	 */
-	private void extractLinkDefinedPrefixes(Document in) {
-		List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK");
-		for (Node linkNode : linkNodes) {
-			NamedNodeMap attributes = linkNode.getAttributes();
-			String rel = attributes.getNamedItem("rel").getTextContent();
-			String href = attributes.getNamedItem("href").getTextContent();
-			if (rel != null && href != null && RDFUtils.isAbsoluteIRI(href)) {
-				prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href));
-			}
-		}
-	}
-
-	private Set<JSONLDScript> extractJSONLDScript(Document in,
-			String baseProfile, ExtractionParameters extractionParameters,
-			ExtractionContext extractionContext, ExtractionResult out)
-			throws IOException, ExtractionException {
-		List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
-		Set<JSONLDScript> result = new HashSet<>();
-		extractor = new JSONLDExtractorFactory().createExtractor();
-		for (Node jsonldNode : scriptNodes) {
-			NamedNodeMap attributes = jsonldNode.getAttributes();
-			for (int i = 0; i < attributes.getLength(); i++) {
-				if (attributes.item(i).getTextContent()
-						.equalsIgnoreCase("application/ld+json")) {
-					extractor.run(extractionParameters, extractionContext,
-							DomUtils.nodeToInputStream(jsonldNode
-									.getFirstChild()), out);
-				}
-			}
-			Node nameAttribute = attributes.getNamedItem("name");
-			Node contentAttribute = attributes.getNamedItem("content");
-			if (nameAttribute == null || contentAttribute == null) {
-				continue;
-			}
-			String name = nameAttribute.getTextContent();
-			String content = contentAttribute.getTextContent();
-			String xpath = DomUtils.getXPathForNode(jsonldNode);
-			IRI nameAsIRI = getPrefixIfExists(name);
-			if (nameAsIRI == null) {
-				nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name);
-			}
-			JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsIRI,
-					content);
-			result.add(jsonldScript);
-		}
-		return result;
-	}
-
-	private IRI getPrefixIfExists(String name) {
-		String[] split = name.split("\\.");
-		if (split.length == 2 && prefixes.containsKey(split[0])) {
-			return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]);
-		}
-		return null;
-	}
-
-	@Override
-	public ExtractorDescription getDescription() {
-		return EmbeddedJSONLDExtractorFactory.getDescriptionInstance();
-	}
-
-	private class JSONLDScript {
-
-		private String xpath;
-
-		private IRI name;
-
-		private String lang;
-
-		private String content;
-
-		public JSONLDScript(String xpath, IRI name, String content) {
-			this.xpath = xpath;
-			this.name = name;
-			this.content = content;
-		}
-
-		public JSONLDScript(String xpath, IRI name, String content, String lang) {
-			this(xpath, name, content);
-			this.lang = lang;
-		}
-
-		public IRI getName() {
-			return name;
-		}
-
-		public void setName(IRI name) {
-			this.name = name;
-		}
-
-		public String getLang() {
-			return lang;
-		}
-
-		public void setLang(String lang) {
-			this.lang = lang;
-		}
-
-		public String getContent() {
-			return content;
-		}
-
-		public void setContent(String content) {
-			this.content = content;
-		}
-
-		@Override
-		public boolean equals(Object o) {
-			if (this == o) {
-				return true;
-			}
-			if (o == null) {
-				return false;
-			}
-			if (!(o instanceof JSONLDScript)) {
-				return false;
-			}
-
-			JSONLDScript meta = (JSONLDScript) o;
-
-			if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) {
-				return false;
-			}
-
-			return true;
-		}
-
-		@Override
-		public int hashCode() {
-			return xpath != null ? xpath.hashCode() : 0;
-		}
-	}
+  private static final SINDICE vSINDICE = SINDICE.getInstance();
+
+  private IRI profile;
+
+  private Map<String, IRI> prefixes = new HashMap<>();
+
+  private String documentLang;
+
+  private JSONLDExtractor extractor;
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void run(ExtractionParameters extractionParameters,
+          ExtractionContext extractionContext, Document in,
+          ExtractionResult out) throws IOException, ExtractionException {
+    profile = extractProfile(in);
+    documentLang = getDocumentLanguage(in);
+    extractLinkDefinedPrefixes(in);
+
+    String baseProfile = vSINDICE.NS;
+    if (profile != null) {
+      baseProfile = profile.toString();
+    }
+
+    extractionContext.getDocumentIRI();
+    Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile,
+            extractionParameters, extractionContext, out);
+    for (JSONLDScript jsonldScript : jsonldScripts) {
+      //String lang = documentLang;
+      //if (jsonldScript.getLang() != null) {
+      //	lang = jsonldScript.getLang();
+      //}
+      //out.writeTriple(documentIRI, jsonldScript.getName(),
+      //		SimpleValueFactory.getInstance().createLiteral(jsonldScript.getContent(), lang));
+    }
+  }
+
+  /**
+   * Returns the {@link Document} language if declared, <code>null</code>
+   * otherwise.
+   *
+   * @param in
+   *            a instance of {@link Document}.
+   * @return the language declared, could be <code>null</code>.
+   */
+  private String getDocumentLanguage(Document in) {
+    String lang = DomUtils.find(in, "string(/HTML/@lang)");
+    if ("".equals(lang)) {
+      return null;
+    }
+    return lang;
+  }
+
+  private IRI extractProfile(Document in) {
+    String profile = DomUtils.find(in, "string(/HTML/@profile)");
+    if ("".equals(profile)) {
+      return null;
+    }
+    return SimpleValueFactory.getInstance().createIRI(profile);
+  }
+
+  /**
+   * It extracts prefixes defined in the <i>LINK</i> meta tags.
+   *
+   * @param in
+   */
+  private void extractLinkDefinedPrefixes(Document in) {
+    List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK");
+    for (Node linkNode : linkNodes) {
+      NamedNodeMap attributes = linkNode.getAttributes();
+      String rel = attributes.getNamedItem("rel").getTextContent();
+      String href = attributes.getNamedItem("href").getTextContent();
+      if (rel != null && href != null && RDFUtils.isAbsoluteIRI(href)) {
+        prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href));
+      }
+    }
+  }
+
+  private Set<JSONLDScript> extractJSONLDScript(Document in,
+          String baseProfile, ExtractionParameters extractionParameters,
+          ExtractionContext extractionContext, ExtractionResult out)
+                  throws IOException, ExtractionException {
+    List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
+    Set<JSONLDScript> result = new HashSet<>();
+    extractor = new JSONLDExtractorFactory().createExtractor();
+    for (Node jsonldNode : scriptNodes) {
+      NamedNodeMap attributes = jsonldNode.getAttributes();
+      for (int i = 0; i < attributes.getLength(); i++) {
+        if ("application/ld+json".equalsIgnoreCase(attributes.item(i).getTextContent())) {
+          extractor.run(extractionParameters, extractionContext,
+                  DomUtils.nodeToInputStream(jsonldNode
+                          .getFirstChild()), out);
+        }
+      }
+      Node nameAttribute = attributes.getNamedItem("name");
+      Node contentAttribute = attributes.getNamedItem("content");
+      if (nameAttribute == null || contentAttribute == null) {
+        continue;
+      }
+      String name = nameAttribute.getTextContent();
+      String content = contentAttribute.getTextContent();
+      String xpath = DomUtils.getXPathForNode(jsonldNode);
+      IRI nameAsIRI = getPrefixIfExists(name);
+      if (nameAsIRI == null) {
+        nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name);
+      }
+      JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsIRI,
+              content);
+      result.add(jsonldScript);
+    }
+    return result;
+  }
+
+  private IRI getPrefixIfExists(String name) {
+    String[] split = name.split("\\.");
+    if (split.length == 2 && prefixes.containsKey(split[0])) {
+      return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]);
+    }
+    return null;
+  }
+
+  @Override
+  public ExtractorDescription getDescription() {
+    return EmbeddedJSONLDExtractorFactory.getDescriptionInstance();
+  }
+
+  private class JSONLDScript {
+
+    private String xpath;
+
+    public JSONLDScript(String xpath, IRI name, String content) {
+      this.xpath = xpath;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null) {
+        return false;
+      }
+      if (!(o instanceof JSONLDScript)) {
+        return false;
+      }
+
+      JSONLDScript meta = (JSONLDScript) o;
+
+      if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) {
+        return false;
+      }
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      return xpath != null ? xpath.hashCode() : 0;
+    }
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
index e67ec42..a3c6550 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
@@ -26,7 +26,6 @@ import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.rdf.RDFUtils;
 import org.apache.any23.vocab.SINDICE;
 import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 import org.w3c.dom.Document;
 import org.w3c.dom.NamedNodeMap;
@@ -51,7 +50,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
 
     private IRI profile;
 
-    private Map<String, IRI> prefixes = new HashMap<String, IRI>();
+    private Map<String, IRI> prefixes = new HashMap<>();
 
     private String documentLang;
 
@@ -82,25 +81,25 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
                 lang = meta.getLang();
             }
             if(meta.isPragmaDirective){
-            	if(lang != null) {
-            		out.writeTriple(
+                if(lang != null) {
+                    out.writeTriple(
                         documentIRI,
                         meta.getHttpEquiv(),
                         SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang));
-            	} else {
+                } else {
                         out.writeTriple(
                                 documentIRI,
                                 meta.getHttpEquiv(),
                                 SimpleValueFactory.getInstance().createLiteral(meta.getContent()));
-            	}
-            }else {
-            	if(lang != null) {
-            		out.writeTriple(
+                }
+            } else {
+                if(lang != null) {
+                    out.writeTriple(
                         documentIRI,
                         meta.getName(),
                         SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang));
-            	} else {
-            		out.writeTriple(
+                } else {
+                    out.writeTriple(
                             documentIRI,
                             meta.getName(),
                             SimpleValueFactory.getInstance().createLiteral(meta.getContent()));
@@ -117,7 +116,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
      */
     private String getDocumentLanguage(Document in) {
         String lang = DomUtils.find(in, "string(/HTML/@lang)");
-        if (lang.equals("")) {
+        if ("".equals(lang)) {
             return null;
         }
         return lang;
@@ -125,7 +124,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
 
     private IRI extractProfile(Document in) {
         String profile = DomUtils.find(in, "string(/HTML/@profile)");
-        if (profile.equals("")) {
+        if ("".equals(profile)) {
             return null;
         }
         return SimpleValueFactory.getInstance().createIRI(profile);
@@ -150,7 +149,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
 
     private Set<Meta> extractMetaElement(Document in, String baseProfile) {
         List<Node> metaNodes = DomUtils.findAll(in, "/HTML/HEAD/META");
-        Set<Meta> result = new HashSet<Meta>();
+        Set<Meta> result = new HashSet<>();
         for (Node metaNode : metaNodes) {
             NamedNodeMap attributes = metaNode.getAttributes();
             Node nameAttribute = attributes.getNamedItem("name");
@@ -223,6 +222,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
             this.setPragmaDirective(true);
         }
 
+        @SuppressWarnings("unused")
         public Meta(String xpath, String content, IRI httpEquiv, String lang) {
             this(xpath,content,httpEquiv);
             this.lang = lang;
@@ -234,15 +234,12 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
             this.content = content;
         }
 
+        @SuppressWarnings("unused")
         public Meta(String xpath, IRI name, String content, String lang) {
             this(xpath, name, content);
             this.lang = lang;
         }
 
-        public boolean isPragmaDirective(){
-            return isPragmaDirective;
-        }
-
         private void setPragmaDirective(boolean value){
             this.isPragmaDirective=value;
         }
@@ -251,42 +248,29 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
             return httpEquiv;
         }
 
-        public void setHttpEquiv(IRI httpEquiv){
-            this.httpEquiv=httpEquiv;
-        }
-
         public IRI getName() {
             return name;
         }
 
-        public void setName(IRI name) {
-            this.name = name;
-        }
-
         public String getLang() {
             return lang;
         }
 
-        public void setLang(String lang) {
-            this.lang = lang;
-        }
-
         public String getContent() {
             return content;
         }
 
-        public void setContent(String content) {
-            this.content = content;
-        }
-
         @Override
         public boolean equals(Object o) {
-            if (this == o) return true;
-            if (o == null || getClass() != o.getClass()) return false;
+            if (this == o)
+                return true;
+            if (o == null || getClass() != o.getClass())
+                return false;
 
             Meta meta = (Meta) o;
 
-            if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) return false;
+            if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null)
+                return false;
 
             return true;
         }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/Item.java b/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
index 04ffa66..769b0d2 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
@@ -30,11 +30,6 @@ public abstract class Item {
     private final String xpath;
 
     /**
-     * @return the <b>JSON</b> representation for this item.
-     */
-    public abstract String toJSON();
-
-    /**
      * Constructor.
      *
      * @param xpath xpath to this item in container document.
@@ -47,6 +42,11 @@ public abstract class Item {
     }
 
     /**
+     * @return the <b>JSON</b> representation for this item.
+     */
+    public abstract String toJSON();
+
+    /**
      * @return the item location in container document.
      */
     public String getXpath() {

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index 0688fb8..f32b468 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -31,7 +31,17 @@ import org.apache.any23.util.StringUtils;
  */
 public class ItemPropValue {
 
-    private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<SimpleDateFormat>();
+    /**
+     * Internal content value.
+     */
+    private final Object content;
+
+    /**
+     * Content type.
+     */
+    private final Type type;
+
+    private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<>();
 
     /**
      * Supported types.
@@ -61,16 +71,6 @@ public class ItemPropValue {
     }
 
     /**
-     * Internal content value.
-     */
-    private final Object content;
-
-    /**
-     * Content type.
-     */
-    private final Type type;
-
-    /**
      * Constructor.
      *
      * @param content content object.
@@ -94,12 +94,11 @@ public class ItemPropValue {
             );
         }
         if(content instanceof String && ((String) content).trim().length() == 0) {
-            content = "Null";
             // ANY23-115 Empty spans seem to break ANY23
             // instead of throwing the exception and in effect failing the entire
             // parse job we wish to be lenient on web content publishers and add
             // Null (String) as content.
-            //throw new IllegalArgumentException("Invalid content '" + content + "'");
+            content = "Null";
         }
         this.content = content;
         this.type = type;
@@ -151,7 +150,8 @@ public class ItemPropValue {
      * @return <code>true</code> if type is an integer.
      */
     public boolean isInteger() {
-        if(type != Type.Plain) return false;
+        if(type != Type.Plain)
+            return false;
          try {
              Integer.parseInt((String) content);
              return true;
@@ -164,7 +164,8 @@ public class ItemPropValue {
      * @return <code>true</code> if type is a float.
      */
      public boolean isFloat() {
-         if(type != Type.Plain) return false;
+         if(type != Type.Plain)
+             return false;
          try {
              Float.parseFloat((String) content);
              return true;

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
index 5f817f5..f36828a 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
@@ -87,17 +87,17 @@ public class ItemScope extends Item {
         this.refs = refs;
         this.itemId = itemId;
 
-        final Map<String, List<ItemProp>> tmpProperties = new HashMap<String, List<ItemProp>>();
+        final Map<String, List<ItemProp>> tmpProperties = new HashMap<>();
         for (ItemProp itemProp : itemProps) {
             final String propName = itemProp.getName();
             List<ItemProp> propList = tmpProperties.get(propName);
             if (propList == null) {
-                propList = new ArrayList<ItemProp>();
+                propList = new ArrayList<>();
                 tmpProperties.put(propName, propList);
             }
             propList.add(itemProp);
         }
-        final Map<String, List<ItemProp>> properties = new HashMap<String, List<ItemProp>>();
+        final Map<String, List<ItemProp>> properties = new HashMap<>();
         for (Map.Entry<String, List<ItemProp>> propertiesEntry : tmpProperties.entrySet()) {
             properties.put(
                     propertiesEntry.getKey(),
@@ -147,7 +147,8 @@ public class ItemScope extends Item {
     @Override
     public String toJSON() {
         StringBuilder sb = new StringBuilder();
-        int i, j;
+        int i;
+        int j;
         final Collection<List<ItemProp>> itemPropsList = properties.values();
         j = 0;
         for (List<ItemProp> itemProps : itemPropsList) {
@@ -184,12 +185,12 @@ public class ItemScope extends Item {
 
     @Override
     public int hashCode() {
-	int i = properties == null ? 0 : properties.hashCode();
-	i += id == null         ? 0 : id.hashCode();
-	i += refs == null       ? 0 : refs.hashCode();
-	i += type == null       ? 0 : type.hashCode();
-	i += itemId == null     ? 0 : itemId.hashCode();
-	return i;
+        int i = properties == null ? 0 : properties.hashCode();
+        i += id == null         ? 0 : id.hashCode();
+        i += refs == null       ? 0 : Arrays.hashCode(refs);
+        i += type == null       ? 0 : type.hashCode();
+        i += itemId == null     ? 0 : itemId.hashCode();
+        return i;
     }
 
     @Override
@@ -221,15 +222,17 @@ public class ItemScope extends Item {
     protected void acquireProperty(ItemProp itemProp) {
         List<ItemProp> itemProps = properties.get(itemProp.getName());
         if (itemProps == null) {
-            itemProps = new ArrayList<ItemProp>();
+            itemProps = new ArrayList<>();
             properties.put(itemProp.getName(), itemProps);
         }
-        if (!itemProps.contains(itemProp)) itemProps.add(itemProp);
+        if (!itemProps.contains(itemProp))
+            itemProps.add(itemProp);
     }
 
     protected void disownProperty(ItemProp itemProp) {
         List<ItemProp> propList = properties.get(itemProp.getName());
-        if (propList != null) propList.remove(itemProp);
+        if (propList != null)
+            propList.remove(itemProp);
     }
 
     private String toJSON(String[] in) {

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
index 636f4d2..aa01dfe 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
@@ -106,7 +106,7 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
          * 5.2.6
          */
         final IRI documentIRI = extractionContext.getDocumentIRI();
-        final Map<ItemScope, Resource> mappings = new HashMap<ItemScope, Resource>();
+        final Map<ItemScope, Resource> mappings = new HashMap<>();
         for (ItemScope itemScope : itemScopes) {
             Resource subject = processType(itemScope, documentIRI, out, mappings);
             out.writeTriple(
@@ -143,7 +143,7 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
      */
     private String getDocumentLanguage(Document in) {
         String lang = DomUtils.find(in, "string(/HTML/@lang)");
-        if (lang.equals("")) {
+        if ("".equals(lang)) {
             return null;
         }
         return lang;
@@ -256,13 +256,13 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
             }
         }
         String[] relTokens = rel.getTextContent().split(" ");
-        Set<String> tokensWithNoDuplicates = new HashSet<String>();
+        Set<String> tokensWithNoDuplicates = new HashSet<>();
         for (String relToken : relTokens) {
             if (relToken.contains(":")) {
                 // if contain semi-colon, skip
                 continue;
             }
-            if (relToken.equals("alternate") || relToken.equals("stylesheet")) {
+            if ("alternate".equals(relToken) || "stylesheet".equals(relToken)) {
                 tokensWithNoDuplicates.add("ALTERNATE-STYLESHEET");
                 continue;
             }
@@ -295,7 +295,7 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
         NodeList metas = in.getElementsByTagName("meta");
         for (int i = 0; i < metas.getLength(); i++) {
             Node meta = metas.item(i);
-            String name    = DomUtils.readAttribute(meta, "name"   , null);
+            String name    = DomUtils.readAttribute(meta, "name", null);
             String content = DomUtils.readAttribute(meta, "content", null);
             if (name != null && content != null) {
                 if (isAbsoluteURL(name)) {
@@ -482,20 +482,18 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
             ExtractionResult out
     ) throws MalformedURLException, ExtractionException {
         IRI predicate;
-        if (!isAbsoluteURL(propName) && itemScopeType.equals("") && isStrict) {
+        if (!isAbsoluteURL(propName) && "".equals(itemScopeType) && isStrict) {
             return;
-        } else if (!isAbsoluteURL(propName) && itemScopeType.equals("") && !isStrict) {
+        } else if (!isAbsoluteURL(propName) && "".equals(itemScopeType) && !isStrict) {
             predicate = RDFUtils.iri(toAbsoluteURL(
-			        defaultNamespace,
-			        propName,
-			        '/'
-			).toString());
+                    defaultNamespace,
+                    propName,
+                    '/').toString());
         } else {
             predicate = RDFUtils.iri(toAbsoluteURL(
-			        itemScopeType,
-			        propName,
-			        '/'
-			).toString());
+                    itemScopeType,
+                    propName,
+                    '/').toString());
         }
         Value value;
         Object propValue = itemProp.getValue().getContent();
@@ -506,10 +504,9 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
             value = RDFUtils.literal((String) propValue, documentLanguage);
         } else if (propType.equals(ItemPropValue.Type.Link)) {
             value = RDFUtils.iri(toAbsoluteURL(
-			        documentIRI.toString(),
-			        (String) propValue,
-			        '/'
-			).toString());
+                    documentIRI.toString(),
+                    (String) propValue,
+                    '/').toString());
         } else if (propType.equals(ItemPropValue.Type.Date)) {
             value = RDFUtils.literal(ItemPropValue.formatDateTime((Date) propValue), XMLSchema.DATE);
         } else {

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
index 147fd18..cf05e35 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
@@ -48,54 +48,36 @@ public class MicrodataParser {
 
     enum ErrorMode {
         /** This mode raises an exception at first encountered error. */
-        StopAtFirstError,
+        STOP_AT_FIRST_ERROR,
         /**  This mode produces a full error report. */
-        FullReport
+        FULL_REPORT
     }
 
-    public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
-    public static final String ITEMPROP_ATTRIBUTE  = "itemprop";
-
-    /**
-     * List of tags providing the <code>src</code> property.
-     */
-    public static final Set<String> SRC_TAGS =  Collections.unmodifiableSet(
-            new HashSet<String>( Arrays.asList("audio", "embed", "frame", "iframe", "img", 
-              "source", "track", "video", "input", "layer", "script", "textarea") )
-    );
-
-    /**
-     * List of tags providing the <code>href</code> property.
-     */
-    public static final Set<String> HREF_TAGS =  Collections.unmodifiableSet(
-            new HashSet<String>( Arrays.asList("a", "area", "link") )
-    );
-
     private final Document document;
 
     /**
      * This set holds the name of properties being dereferenced.
-     * The {@link #deferProperties(String...)} checks first if the
+     * The {@link #deferProperties(Node, String...)} checks first if the
      * required dereference has been already asked, if so raises
      * a loop detection error. This map works in coordination
      * with {@link #dereferenceRecursionCounter}, so that at the end of
-     * {@link #deferProperties(String...)} call recursion the
+     * {@link #deferProperties(Node, String...)} call recursion the
      * {@link #loopDetectorSet} can be cleaned up.
      */
-    private final Set<String> loopDetectorSet = new HashSet<String>();
+    private final Set<String> loopDetectorSet = new HashSet<>();
 
     /**
      * {@link ItemScope} cache.
      */
-    private final Map<Node,ItemScope> itemScopes = new HashMap<Node,ItemScope>();
+    private final Map<Node,ItemScope> itemScopes = new HashMap<>();
 
     /**
      * {@link ItemPropValue} cache.
      */
-    private final Map<Node, ItemPropValue> itemPropValues = new HashMap<Node, ItemPropValue>();
+    private final Map<Node, ItemPropValue> itemPropValues = new HashMap<>();
 
    /**
-     * Counts the recursive call of {@link #deferProperties(String...)}.
+     * Counts the recursive call of {@link #deferProperties(Node, String...)}.
      * It helps to cleanup the {@link #loopDetectorSet} when recursion ends.
      */
     private int dereferenceRecursionCounter = 0;
@@ -103,12 +85,37 @@ public class MicrodataParser {
     /**
      * Current error mode.
      */
-    private ErrorMode errorMode = ErrorMode.FullReport;
+    private ErrorMode errorMode = ErrorMode.FULL_REPORT;
 
     /**
      * List of collected errors. Used when {@link #errorMode} <code>==</code> {@link ErrorMode#FullReport}.
      */
-    private List<MicrodataParserException> errors = new ArrayList<MicrodataParserException>();
+    private List<MicrodataParserException> errors = new ArrayList<>();
+
+    public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
+    public static final String ITEMPROP_ATTRIBUTE  = "itemprop";
+
+    /**
+     * List of tags providing the <code>src</code> property.
+     */
+    public static final Set<String> SRC_TAGS =  Collections.unmodifiableSet(
+            new HashSet<String>( Arrays.asList("audio", "embed", "frame", "iframe", "img", 
+              "source", "track", "video", "input", "layer", "script", "textarea") )
+    );
+
+    /**
+     * List of tags providing the <code>href</code> property.
+     */
+    public static final Set<String> HREF_TAGS =  Collections.unmodifiableSet(
+            new HashSet<String>( Arrays.asList("a", "area", "link") )
+    );
+
+    public MicrodataParser(Document document) {
+      if(document == null) {
+          throw new NullPointerException("Document cannot be null.");
+      }
+      this.document = document;
+    }
 
     /**
      * Returns all the <i>itemScope</i>s detected within the given root node.
@@ -158,7 +165,7 @@ public class MicrodataParser {
      */
     public static List<Node> getTopLevelItemScopeNodes(Node node)  {
         final List<Node> itemScopes = getItemScopeNodes(node);
-        final List<Node> topLevelItemScopes = new ArrayList<Node>();
+        final List<Node> topLevelItemScopes = new ArrayList<>();
         for(Node itemScope : itemScopes) {
             if( ! isItemProp(itemScope) ) {
                 topLevelItemScopes.add(itemScope);
@@ -176,13 +183,13 @@ public class MicrodataParser {
      * @param errorMode error management policy.
      * @return list of <b>itemscope</b> items.
      * @throws MicrodataParserException if
-     *         <code>errorMode == {@link org.apache.any23.extractor.microdata.MicrodataParser.ErrorMode#StopAtFirstError}</code>
+     *         <code>errorMode == {@link org.apache.any23.extractor.microdata.MicrodataParser.ErrorMode#STOP_AT_FIRST_ERROR}</code>
      *         and an error occurs.
      */
     public static MicrodataParserReport getMicrodata(Document document, ErrorMode errorMode)
     throws MicrodataParserException {
         final List<Node> itemNodes = getTopLevelItemScopeNodes(document);
-        final List<ItemScope> items = new ArrayList<ItemScope>();
+        final List<ItemScope> items = new ArrayList<>();
         final MicrodataParser microdataParser = new MicrodataParser(document);
         microdataParser.setErrorMode(errorMode);
         for(Node itemNode : itemNodes) {
@@ -203,7 +210,7 @@ public class MicrodataParser {
      */
     public static MicrodataParserReport getMicrodata(Document document) {
         try {
-            return getMicrodata(document, ErrorMode.FullReport);
+            return getMicrodata(document, ErrorMode.FULL_REPORT);
         } catch (MicrodataParserException mpe) {
              throw new IllegalStateException("Unexpected exception.", mpe);
         }
@@ -255,12 +262,14 @@ public class MicrodataParser {
      * @param candidates list of candidate nodes.
      * @return list of unnested nodes.
      */
+    @SuppressWarnings("unused")
     private static List<Node> getUnnestedNodes(List<Node> candidates) {
-        final List<Node> unnesteds  = new ArrayList<Node>();
+        final List<Node> unnesteds  = new ArrayList<>();
         for(int i = 0; i < candidates.size(); i++) {
             boolean skip = false;
             for(int j = 0; j < candidates.size(); j++) {
-                if(i == j) continue;
+                if(i == j)
+                    continue;
                 if( DomUtils.isAncestorOf(candidates.get(j), candidates.get(i), true) ) {
                     skip = true;
                     break;
@@ -273,15 +282,9 @@ public class MicrodataParser {
         return unnesteds;
     }
 
-    public MicrodataParser(Document document) {
-        if(document == null) {
-            throw new NullPointerException("Document cannot be null.");
-        }
-        this.document = document;
-    }
-
     public void setErrorMode(ErrorMode errorMode) {
-        if(errorMode == null) throw new IllegalArgumentException("errorMode must be not null.");
+        if(errorMode == null)
+            throw new IllegalArgumentException("errorMode must be not null.");
         this.errorMode = errorMode;
     }
 
@@ -306,7 +309,8 @@ public class MicrodataParser {
      */
     public ItemPropValue getPropertyValue(Node node) throws MicrodataParserException {
         final ItemPropValue itemPropValue = itemPropValues.get(node);
-        if(itemPropValue != null) return itemPropValue;
+        if(itemPropValue != null)
+            return itemPropValue;
 
         final String nodeName = node.getNodeName().toLowerCase();
         if (DomUtils.hasAttribute(node, "content")) {
@@ -338,7 +342,7 @@ public class MicrodataParser {
         }
 
         if( isItemScope(node) ) {
-            return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested );
+            return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested);
         }
 
         final ItemPropValue newItemPropValue = new ItemPropValue( node.getTextContent(), ItemPropValue.Type.Plain);
@@ -356,7 +360,7 @@ public class MicrodataParser {
      * @throws MicrodataParserException if an error occurs while retrieving an property value.
      */
     public List<ItemProp> getItemProps(final Node scopeNode, boolean skipRoot) throws MicrodataParserException {
-        final Set<Node> accepted = new LinkedHashSet<Node>();
+        final Set<Node> accepted = new LinkedHashSet<>();
 
         if (!skipRoot) {
             NamedNodeMap attributes = scopeNode.getAttributes();
@@ -375,19 +379,20 @@ public class MicrodataParser {
                     if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) {
                         accepted.add(node);
                     }
-                    if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
-                        // Don't visit descendants of nodes that define a new scope
-                        return FILTER_REJECT;
-                    }
+//                    ANY23-131 Nested Microdata are not extracted
+//                    if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
+//                        // Don't visit descendants of nodes that define a new scope
+//                        return FILTER_REJECT;
+//                    }
                 }
                 return FILTER_ACCEPT;
             }
         }, false);
 
         // To populate accepted we only need to walk the tree.
-    	while (treeWalker.nextNode() != null);
+        while (treeWalker.nextNode() != null);
 
-        final List<ItemProp> result = new ArrayList<ItemProp>();
+        final List<ItemProp> result = new ArrayList<>();
         for(Node itemPropNode :  accepted) {
             final String itemProp = DomUtils.readAttribute(itemPropNode, ITEMPROP_ATTRIBUTE, null);
             final String[] propertyNames = itemProp.split(" ");
@@ -414,14 +419,15 @@ public class MicrodataParser {
     /**
      * Given a document and a list of <b>itemprop</b> names this method will return
      * such <b>itemprops</b>.
-     *
+     * 
+     * @param node a {@link org.w3c.dom.Node} to which the refs belong
      * @param refs list of references.
      * @return list of retrieved <b>itemprop</b>s.
      * @throws MicrodataParserException if a loop is detected or a property name is missing.
      */
-    public ItemProp[] deferProperties(String... refs) throws MicrodataParserException {
+    public ItemProp[] deferProperties(Node node, String... refs) throws MicrodataParserException {
         dereferenceRecursionCounter++;
-        final List<ItemProp> result = new ArrayList<ItemProp>();
+        final List<ItemProp> result = new ArrayList<>();
         try {
             for (String ref : refs) {
                 if (loopDetectorSet.contains(ref)) {
@@ -434,18 +440,22 @@ public class MicrodataParser {
                         );
                 }
                 loopDetectorSet.add(ref);
-                final Element element = document.getElementById(ref);
+                Element element = (Element) node;
                 if (element == null) {
                     manageError(
                             new MicrodataParserException( String.format("Unknown itemProp id '%s'", ref ), null )
                     );
                     continue;
                 }
-                result.addAll(getItemProps(element, false));
+                List<ItemProp> propList = getItemProps(element, false);
+                if (!result.containsAll(propList)) {
+                  result.addAll(propList);
+                }
             }
         } catch (MicrodataParserException mpe) {
             if(dereferenceRecursionCounter == 1)
-                manageError(mpe); else throw mpe;  // Recursion end, this the the top call.
+                manageError(mpe);
+            else throw mpe;  // Recursion end, this the the top call.
         } finally {
             dereferenceRecursionCounter--;
             if(dereferenceRecursionCounter == 0) { // Recursion end, this the the top call.
@@ -464,7 +474,8 @@ public class MicrodataParser {
      */
     public ItemScope getItemScope(Node node) throws MicrodataParserException {
         final ItemScope itemScope = itemScopes.get(node);
-        if(itemScope != null) return itemScope;
+        if(itemScope != null)
+            return itemScope;
 
         final String id       = DomUtils.readAttribute(node, "id"      , null);
         final String itemref  = DomUtils.readAttribute(node, "itemref" , null);
@@ -475,7 +486,7 @@ public class MicrodataParser {
         final String[] itemrefIDs = itemref == null ? new String[0] : itemref.split(" ");
         final ItemProp[] deferredProperties;
         try {
-            deferredProperties = deferProperties(itemrefIDs);
+            deferredProperties = deferProperties(node, itemrefIDs);
         } catch (MicrodataParserException mpe) {
             mpe.setErrorNode(node);
             throw mpe;
@@ -506,12 +517,13 @@ public class MicrodataParser {
     }
 
     private void manageError(MicrodataParserException mpe) throws MicrodataParserException {
-        if(errorMode == ErrorMode.StopAtFirstError) {
+        if(errorMode == ErrorMode.STOP_AT_FIRST_ERROR) {
             throw mpe;
         }
-        if(errorMode != ErrorMode.FullReport) throw new IllegalStateException("Unsupported mode " + errorMode);
+        if(errorMode != ErrorMode.FULL_REPORT)
+            throw new IllegalStateException("Unsupported mode " + errorMode);
         if(errors == null) {
-            errors = new ArrayList<MicrodataParserException>();
+            errors = new ArrayList<>();
         }
         errors.add(mpe);
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java b/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
index 8fef9b1..dc0eaeb 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
@@ -117,6 +117,7 @@ public class QuadTemplate {
     public void printOut(ExtractionResult er, Map<String,String> variableAssignment) {
         final Resource s = subject.getValue(variableAssignment);
         final IRI p      = predicate.getValue(variableAssignment);
+        @SuppressWarnings("unchecked")
         final Value o    = object.getValue(variableAssignment);
         if(graph != null) {
             final IRI g = graph.getValue(variableAssignment);

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
index d9156a5..95d7b0d 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
@@ -18,7 +18,6 @@
 package org.apache.any23.extractor.xpath;
 
 import org.eclipse.rdf4j.model.Value;
-import org.eclipse.rdf4j.model.impl.BNodeImpl;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 
 /**
@@ -32,9 +31,9 @@ public class TemplateObject extends Term {
      * Supported object types.
      */
     public enum Type {
-        uri,
-        bnode,
-        literal
+        URI,
+        BNODE,
+        LITERAL
     }
 
     /**
@@ -61,33 +60,37 @@ public class TemplateObject extends Term {
     @Override
     protected Value getValueInternal(String value) {
         switch (type) {
-            case uri:
-                try {
-                    return SimpleValueFactory.getInstance().createIRI(value);
-                } catch (IllegalArgumentException iae) {
-                    throw new IllegalArgumentException(
-                            String.format("Expected a valid IRI for object template, found '%s'", value),
-                            iae
-                    );
-                }
-            case bnode:
+            case URI:
+                return createIRI(value);
+            case BNODE:
                 return SimpleValueFactory.getInstance().createBNode(value);
-            case literal:
+            case LITERAL:
                 return SimpleValueFactory.getInstance().createLiteral(value);
             default:
                 throw new IllegalStateException();
         }
     }
 
+    private Value createIRI(String value) {
+        try {
+            return SimpleValueFactory.getInstance().createIRI(value);
+        } catch (IllegalArgumentException iae) {
+            throw new IllegalArgumentException(
+                String.format("Expected a valid IRI for object template, found '%s'", value),
+                iae
+            );
+        }
+    }
+
     @Override
     public String toString() {
         final String superStr = super.toString();
         switch (type) {
-            case uri:
+            case URI:
                 return "<" + superStr + ">";
-            case bnode:
+            case BNODE:
                 return "_:" + superStr;
-            case literal:
+            case LITERAL:
                 return "'" + superStr + "'";
             default:
                 throw new IllegalStateException();

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
index a4ce270..80c8e57 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
@@ -18,7 +18,6 @@
 package org.apache.any23.extractor.xpath;
 
 import org.eclipse.rdf4j.model.Resource;
-import org.eclipse.rdf4j.model.impl.BNodeImpl;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 
 /**
@@ -32,8 +31,8 @@ public class TemplateSubject extends Term<Resource> {
      * Supported subject types.
      */
     public enum Type {
-        uri,
-        bnode
+        URI,
+        BNODE
     }
 
     /**
@@ -60,9 +59,9 @@ public class TemplateSubject extends Term<Resource> {
     @Override
     protected Resource getValueInternal(String value) {
         switch (type) {
-            case uri:
+            case URI:
                 return SimpleValueFactory.getInstance().createIRI(value);
-            case bnode:
+            case BNODE:
                 return SimpleValueFactory.getInstance().createBNode(value);
             default:
                 throw new IllegalStateException();
@@ -73,9 +72,9 @@ public class TemplateSubject extends Term<Resource> {
     public String toString() {
         final String superStr = super.toString();
         switch (type) {
-            case uri:
+            case URI:
                 return "<" + superStr + ">";
-            case bnode:
+            case BNODE:
                 return "_:" + superStr;
             default:
                 throw new IllegalStateException();

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java b/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
index bacb90c..90863ac 100644
--- a/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
@@ -58,6 +58,10 @@ public class ElementsProcessor {
 
     private static final ElementsProcessor _ep = new ElementsProcessor();
 
+    // hide constructor
+    private ElementsProcessor() {
+    }
+
     /**
      * A model holder describes the two required parameters which makes a model useful
      * in further processing: a root node and model itself.
@@ -66,6 +70,11 @@ public class ElementsProcessor {
         private final Value root;
         private final Model model;
 
+        public ModelHolder(Value root, Model model) {
+            this.root = root;
+            this.model = model;
+        }
+
         public Value getRoot() {
             return root;
         }
@@ -73,11 +82,6 @@ public class ElementsProcessor {
         public Model getModel() {
             return model;
         }
-
-        public ModelHolder(Value root, Model model) {
-            this.root = root;
-            this.model = model;
-        }
     }
     
     
@@ -100,15 +104,16 @@ public class ElementsProcessor {
      * created.
      * @return instance of {@link ModelHolder},
      */
+    @SuppressWarnings("unchecked")
     public ModelHolder asModel(IRI namespace, final Object t, Value rootNode) {
         if (t == null) {
             return null;
         }
 
         if (t instanceof List) {
-            return processList(namespace, (List) t);
+            return processList(namespace, (List<Object>) t);
         } else if (t instanceof Map) {
-            return processMap(namespace, (Map) t, rootNode);
+            return processMap(namespace, (Map<String, Object>) t, rootNode);
         } else if (t instanceof String) {
             return asModelHolder(RDFUtils.makeIRI(t.toString()), modelFactory.createEmptyModel());
         } else {
@@ -174,7 +179,6 @@ public class ElementsProcessor {
         return asModelHolder(nodeURI, model);
     }
 
-    @SuppressWarnings("UnusedAssignment")
     protected ModelHolder processList(IRI ns, List<Object> object) {
 
         if (object.isEmpty() || object.stream().noneMatch((i) -> {
@@ -217,10 +221,6 @@ public class ElementsProcessor {
         return asModelHolder(listRoot, finalModel);
     }
 
-    // hide constructor
-    private ElementsProcessor() {
-    }
-
     public static final ElementsProcessor getInstance() {
         return _ep;
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java b/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
index c6efba8..0cbc78c 100644
--- a/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
+++ b/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
@@ -29,7 +29,6 @@ import org.eclipse.rdf4j.model.Statement;
 import org.eclipse.rdf4j.model.IRI;
 import org.eclipse.rdf4j.model.Value;
 import org.eclipse.rdf4j.model.ValueFactory;
-import org.eclipse.rdf4j.model.impl.ValueFactoryBase;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -90,73 +89,90 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
         return defaultLiteralLanguage;
     }
 
+    @Override
     public BNode createBNode() {
         return wrappedFactory.createBNode();
     }
 
+    @Override
     public BNode createBNode(String id) {
-        if (id == null) return null;
+        if (id == null)
+            return null;
         return wrappedFactory.createBNode(id);
     }
 
+    @Override
     public Literal createLiteral(String content) {
-        if (content == null) return null;
+        if (content == null)
+            return null;
         if (defaultLiteralLanguage == null) {
-        	return wrappedFactory.createLiteral(content);
+            return wrappedFactory.createLiteral(content);
         } else {
-        	return wrappedFactory.createLiteral(content, defaultLiteralLanguage);
+            return wrappedFactory.createLiteral(content, defaultLiteralLanguage);
         }
     }
 
+    @Override
     public Literal createLiteral(boolean b) {
         return wrappedFactory.createLiteral(b);
     }
 
+    @Override
     public Literal createLiteral(byte b) {
         return wrappedFactory.createLiteral(b);
     }
 
+    @Override
     public Literal createLiteral(short i) {
         return wrappedFactory.createLiteral(i);
     }
 
+    @Override
     public Literal createLiteral(int i) {
         return wrappedFactory.createLiteral(i);
     }
 
+    @Override
     public Literal createLiteral(long l) {
         return wrappedFactory.createLiteral(l);
     }
 
+    @Override
     public Literal createLiteral(float v) {
         return wrappedFactory.createLiteral(v);
     }
 
+    @Override
     public Literal createLiteral(double v) {
         return wrappedFactory.createLiteral(v);
     }
 
-	@Override
-	public Literal createLiteral(BigDecimal v) {
+    @Override
+    public Literal createLiteral(BigDecimal v) {
         return wrappedFactory.createLiteral(v);
-	}
+    }
 
-	@Override
-	public Literal createLiteral(BigInteger v) {
+    @Override
+    public Literal createLiteral(BigInteger v) {
         return wrappedFactory.createLiteral(v);
-	}
+    }
 
+    @Override
     public Literal createLiteral(XMLGregorianCalendar calendar) {
         return wrappedFactory.createLiteral(calendar);
     }
 
+    @Override
     public Literal createLiteral(String label, String language) {
-        if (label == null) return null;
+        if (label == null)
+            return null;
         return wrappedFactory.createLiteral(label, language);
     }
 
+    @Override
     public Literal createLiteral(String pref, IRI value) {
-        if (pref == null) return null;
+        if (pref == null)
+            return null;
         return wrappedFactory.createLiteral(pref, value);
     }
 
@@ -165,6 +181,7 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
         return wrappedFactory.createLiteral(date);
     }
 
+    @Override
     public Statement createStatement(Resource sub, IRI pre, Value obj) {
         if (sub == null || pre == null || obj == null) {
             return null;
@@ -172,8 +189,10 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
         return wrappedFactory.createStatement(sub, pre, obj);
     }
 
+    @Override
     public Statement createStatement(Resource sub, IRI pre, Value obj, Resource context) {
-        if (sub == null || pre == null || obj == null) return null;
+        if (sub == null || pre == null || obj == null)
+            return null;
         return wrappedFactory.createStatement(sub, pre, obj, context);
     }
 
@@ -181,8 +200,10 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
      * @param uriStr input string to create URI from.
      * @return a valid sesame IRI or null if any exception occurred
      */
+    @Override
     public IRI createIRI(String uriStr) {
-        if (uriStr == null) return null;
+        if (uriStr == null)
+            return null;
         try {
             return wrappedFactory.createIRI(RDFUtils.fixIRIWithException(uriStr));
         } catch (Exception e) {
@@ -194,8 +215,10 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
     /**
      * @return a valid sesame IRI or null if any exception occurred
      */
+    @Override
     public IRI createIRI(String namespace, String localName) {
-        if (namespace == null || localName == null) return null;
+        if (namespace == null || localName == null)
+            return null;
         return wrappedFactory.createIRI(RDFUtils.fixIRIWithException(namespace), localName);
     }
 
@@ -235,9 +258,11 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
      * @return a valid sesame IRI or null if any exception occurred
      */
     public IRI fixLink(String link, String defaultSchema) {
-        if (link == null) return null;
+        if (link == null)
+            return null;
         link = fixWhiteSpace(link);
-        if ("".equals(link)) return null;
+        if ("".equals(link))
+            return null;
         if (defaultSchema != null && !link.startsWith(defaultSchema + ":")) {
             link = defaultSchema + ":" + link;
         }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
index b8f143b..aee9e14 100644
--- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
+++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
@@ -24,19 +24,19 @@ import org.eclipse.rdf4j.model.IRI;
 import org.eclipse.rdf4j.model.Literal;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.Statement;
-import org.eclipse.rdf4j.model.URI;
 import org.eclipse.rdf4j.model.Value;
 import org.eclipse.rdf4j.model.ValueFactory;
 import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 import org.eclipse.rdf4j.model.vocabulary.RDF;
 import org.eclipse.rdf4j.rio.RDFFormat;
-import org.eclipse.rdf4j.rio.RDFHandlerException;
-import org.eclipse.rdf4j.rio.RDFParseException;
 import org.eclipse.rdf4j.rio.RDFParser;
 import org.eclipse.rdf4j.rio.RDFParserRegistry;
 import org.eclipse.rdf4j.rio.RDFWriter;
 import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
 import org.eclipse.rdf4j.rio.helpers.StatementCollector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.xml.datatype.DatatypeConfigurationException;
 import javax.xml.datatype.DatatypeFactory;
@@ -67,6 +67,10 @@ public class RDFUtils {
 
     private static final ValueFactory valueFactory = SimpleValueFactory.getInstance();
 
+    private static final Logger LOG = LoggerFactory.getLogger(RDFUtils.class);
+
+    private RDFUtils() {}
+
     /**
      * Fixes typical errors in an absolute org.eclipse.rdf4j.model.IRI, such as unescaped spaces.
      *
@@ -177,17 +181,6 @@ public class RDFUtils {
 
     /**
      * Creates a {@link org.eclipse.rdf4j.model.IRI}.
-     * @param uri string representation of the {@link org.eclipse.rdf4j.model.IRI}
-     * @return a valid {@link org.eclipse.rdf4j.model.IRI}
-     * @deprecated Use {@link #iri(String)} instead.
-     */
-    @Deprecated
-    public static org.eclipse.rdf4j.model.IRI uri(String uri) {
-        return iri(uri);
-    }
-
-    /**
-     * Creates a {@link org.eclipse.rdf4j.model.IRI}.
      * @param iri a base string for the {@link org.eclipse.rdf4j.model.IRI}
      * @return a valid {@link org.eclipse.rdf4j.model.IRI}
      */
@@ -197,18 +190,6 @@ public class RDFUtils {
 
     /**
      * Creates a {@link org.eclipse.rdf4j.model.IRI}.
-     * @deprecated Use {@link #iri(String, String)} instead.
-     * @param namespace a base namespace for the {@link org.eclipse.rdf4j.model.IRI}
-     * @param localName a local name to associate with the namespace
-     * @return a valid {@link org.eclipse.rdf4j.model.IRI}
-     */
-    @Deprecated
-    public static org.eclipse.rdf4j.model.IRI uri(String namespace, String localName) {
-        return valueFactory.createIRI(namespace, localName);
-    }
-
-    /**
-     * Creates a {@link org.eclipse.rdf4j.model.IRI}.
      * @param namespace a base namespace for the {@link org.eclipse.rdf4j.model.IRI}
      * @param localName a local name to associate with the namespace
      * @return a valid {@link org.eclipse.rdf4j.model.IRI}
@@ -297,25 +278,12 @@ public class RDFUtils {
      * @return valid {@link org.eclipse.rdf4j.model.Literal}
      */
     public static Literal literal(String s, String l) {
-    	if(l == null) {
-    		// HACK: Workaround for ANY23 code that passes null in for language tag
-    		return valueFactory.createLiteral(s);
-    	} else {
-    		return valueFactory.createLiteral(s, l);
-    	}
-    }
-
-    /**
-     * Creates a {@link Literal}.
-     * @param s string representation of the base namespace for the
-     * {@link org.eclipse.rdf4j.model.Literal}
-     * @param datatype the datatype to associate with the namespace.
-     * @return valid {@link org.eclipse.rdf4j.model.Literal}
-     * @deprecated Use {@link #literal(String, org.eclipse.rdf4j.model.IRI)} instead.
-     */
-    @Deprecated
-    public static Literal literal(String s, URI datatype) {
-        return valueFactory.createLiteral(s, datatype);
+        if(l == null) {
+            // HACK: Workaround for ANY23 code that passes null in for language tag
+            return valueFactory.createLiteral(s);
+        } else {
+            return valueFactory.createLiteral(s, l);
+        }
     }
 
     /**
@@ -488,15 +456,13 @@ public class RDFUtils {
      * @param is input stream containing <code>RDF</code>.
      * @param baseIRI base uri.
      * @return list of statements detected within the input stream.
-     * @throws RDFHandlerException if there is an error handling the RDF
      * @throws IOException if there is an error reading the {@link java.io.InputStream}
-     * @throws RDFParseException if there is an error handling the RDF
      */
     public static Statement[] parseRDF(RDFFormat format, InputStream is, String baseIRI)
-    throws RDFHandlerException, IOException, RDFParseException {
+    throws IOException {
         final StatementCollector handler = new StatementCollector();
         final RDFParser parser = getParser(format);
-        parser.setVerifyData(true);
+        parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
         parser.setStopAtFirstError(true);
         parser.setPreserveBNodeIDs(true);
         parser.setRDFHandler(handler);
@@ -511,12 +477,10 @@ public class RDFUtils {
      * @param format input format type.
      * @param is input stream containing <code>RDF</code>.
      * @return list of statements detected within the input stream.
-     * @throws RDFHandlerException if there is an error handling the RDF
      * @throws IOException if there is an error reading the {@link java.io.InputStream}
-     * @throws RDFParseException if there is an error handling the RDF
      */
     public static Statement[] parseRDF(RDFFormat format, InputStream is)
-    throws RDFHandlerException, IOException, RDFParseException {
+    throws IOException {
         return parseRDF(format, is, "");
     }
 
@@ -527,12 +491,10 @@ public class RDFUtils {
      * @param format input format type.
      * @param in input string containing <code>RDF</code>.
      * @return list of statements detected within the input string.
-     * @throws RDFHandlerException if there is an error handling the RDF
      * @throws IOException if there is an error reading the {@link java.io.InputStream}
-     * @throws RDFParseException if there is an error handling the RDF
      */
     public static Statement[] parseRDF(RDFFormat format, String in)
-    throws RDFHandlerException, IOException, RDFParseException {
+    throws IOException {
         return parseRDF(format, new ByteArrayInputStream(in.getBytes()));
     }
 
@@ -543,11 +505,9 @@ public class RDFUtils {
      * @param resource resource name.
      * @return the statements declared within the resource file.
      * @throws java.io.IOException if an error occurs while reading file.
-     * @throws org.eclipse.rdf4j.rio.RDFHandlerException if an error occurs while parsing file.
-     * @throws org.eclipse.rdf4j.rio.RDFParseException if an error occurs while parsing file.
      */
-    public static Statement[] parseRDF(String resource) throws RDFHandlerException, IOException, RDFParseException {
-        final int extIndex = resource.lastIndexOf(".");
+    public static Statement[] parseRDF(String resource) throws IOException {
+        final int extIndex = resource.lastIndexOf('.');
         if(extIndex == -1)
             throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource);
         final String extension = resource.substring(extIndex + 1);
@@ -568,8 +528,10 @@ public class RDFUtils {
             new java.net.URI(href.trim());
             return true;
         } catch (IllegalArgumentException e) {
+            LOG.error("Error processing href: {}", href, e);
             return false;
         } catch (URISyntaxException e) {
+            LOG.error("Error interpreting href: {} as URI.", href, e);
             return false;
         }
     }
@@ -649,7 +611,5 @@ public class RDFUtils {
         nodeId++;
         return bnode;
     }
-    
-    private RDFUtils() {}
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java b/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
index 648db28..2e591d0 100644
--- a/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
+++ b/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
@@ -41,6 +41,7 @@ import java.util.List;
  */
 public class XMLValidationReportSerializer implements ValidationReportSerializer {
 
+    @Override
     public void serialize(ValidationReport vr, OutputStream os) throws SerializationException {
         final PrintStream ps = new PrintStream(os);
         try {
@@ -54,9 +55,9 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
         if(o == null) {
             return;
         }
-        final Class oClass = o.getClass();
+        final Class<? extends Object> oClass = o.getClass();
         final String oClassName = getClassName(oClass);
-        ps.printf("<%s>\n", oClassName);
+        ps.printf("<%s>%n", oClassName);
         List<Method> getters = filterGetters(o.getClass());
         if(getters.isEmpty()) {
             ps.print( o.toString() );
@@ -65,11 +66,11 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
         for (Method getter : getters) {
             serializeGetterValue(o, getter, ps);
         }
-        ps.printf("</%s>\n", oClassName);
+        ps.printf("</%s>%n", oClassName);
     }
 
-    private String getClassName(Class oClass) {
-        final NodeName nodeName = (NodeName) oClass.getAnnotation(NodeName.class);
+    private String getClassName(Class<? extends Object> oClass) {
+        final NodeName nodeName = oClass.getAnnotation(NodeName.class);
         if(nodeName != null) {
             return nodeName.value();
         }
@@ -77,9 +78,9 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
         return Character.toLowerCase(simpleName.charAt(0)) + simpleName.substring(1);
     }
 
-    private List<Method> filterGetters(Class c) {
+    private List<Method> filterGetters(Class<? extends Object> c) {
         Method[] methods = c.getDeclaredMethods();
-        List<Method> filtered = new ArrayList<Method>();
+        List<Method> filtered = new ArrayList<>();
         for(Method method : methods) {
             if(Modifier.isStatic(method.getModifiers())) {
                 continue;
@@ -110,9 +111,9 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
         }
         final String property = getPropertyFromMethodName(methodName);
         if( isManaged(value) ) {
-            ps.printf("<%s>\n", property);
+            ps.printf("<%s>%n", property);
             printObject(value, ps);
-            ps.printf("</%s>\n", property);
+            ps.printf("</%s>%n", property);
         } else {
             List<Method> getters = filterGetters(value.getClass());
             for (Method getter : getters) {
@@ -148,7 +149,7 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
             return;
         }
         if(o instanceof Collection) {
-            Collection collection = (Collection) o;
+            Collection<?> collection = (Collection<?>) o;
             if(collection.isEmpty()) {
                 return;
             }

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java b/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
index 0275c4e..cb3e668 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
@@ -45,6 +45,7 @@ public class AboutNotURIRule implements Rule {
         return "about-not-uri-rule";
     }
 
+    @SuppressWarnings("unchecked")
     @Override
     public boolean applyOn(
             DOMDocument document,

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
index 1b965ec..757b6de 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
@@ -43,6 +43,7 @@ public class MetaNameMisuseRule implements Rule {
         return "meta-name-misuse-rule";
     }
 
+    @SuppressWarnings("unchecked")
     @Override
     public boolean applyOn(
             DOMDocument document,

http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java b/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
index 70d241a..f678913 100644
--- a/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
+++ b/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
@@ -43,7 +43,9 @@ import java.util.Map;
 public class RDFSchemaUtils {
 
     private static final String RDF_XML_SEPARATOR = StringUtils.multiply('=', 100);
-    
+
+    private RDFSchemaUtils() {}
+
     /**
      * Serializes a vocabulary composed of the given <code>namespace</code>,
      * <code>resources</code> and <code>properties</code>.
@@ -61,7 +63,7 @@ public class RDFSchemaUtils {
             IRI[] properties,
             Map<IRI,String> comments,
             RDFWriter writer
-    ) throws RDFHandlerException {
+    ) {
         writer.startRDF();
         for(IRI clazz : classes) {
             writer.handleStatement( RDFUtils.quad(clazz, RDF.TYPE, RDFS.CLASS, namespace) );
@@ -87,8 +89,7 @@ public class RDFSchemaUtils {
      * @param writer output writer.
      * @throws RDFHandlerException if there is an error handling the RDF
      */
-    public static void serializeVocabulary(Vocabulary vocabulary, RDFWriter writer)
-    throws RDFHandlerException {
+    public static void serializeVocabulary(Vocabulary vocabulary, RDFWriter writer) {
         serializeVocabulary(
                 vocabulary.getNamespace(),
                 vocabulary.getClasses(),
@@ -111,8 +112,7 @@ public class RDFSchemaUtils {
             Vocabulary vocabulary,
             RDFFormat format,
             boolean willFollowAnother,
-            PrintStream ps
-    ) throws RDFHandlerException {
+            PrintStream ps) {
         final RDFWriter rdfWriter;
         if(format == RDFFormat.RDFXML) {
             rdfWriter = Rio.createWriter(RDFFormat.RDFXML, ps);
@@ -134,8 +134,7 @@ public class RDFSchemaUtils {
      * @return string contained serialization.
      * @throws RDFHandlerException if there is an error handling the RDF
      */
-    public static String serializeVocabulary(Vocabulary vocabulary, RDFFormat format)
-    throws RDFHandlerException {
+    public static String serializeVocabulary(Vocabulary vocabulary, RDFFormat format) {
         final ByteArrayOutputStream baos = new ByteArrayOutputStream();
         final PrintStream ps = new PrintStream(baos);
         serializeVocabulary(vocabulary, format, false, ps);
@@ -150,16 +149,17 @@ public class RDFSchemaUtils {
      * @param ps output print stream.
      */
     public static void serializeVocabularies(RDFFormat format, PrintStream ps) {
-        final Class vocabularyClass = Vocabulary.class;
+        final Class<Vocabulary> vocabularyClass = Vocabulary.class;
+        @SuppressWarnings("rawtypes")
         final List<Class> vocabularies = DiscoveryUtils.getClassesInPackage(
                 vocabularyClass.getPackage().getName(),
                 vocabularyClass
         );
         int currentIndex = 0;
-        for (Class vocabClazz : vocabularies) {
+        for (Class<?> vocabClazz : vocabularies) {
             final Vocabulary instance;
             try {
-                final Constructor constructor = vocabClazz.getDeclaredConstructor();
+                final Constructor<?> constructor = vocabClazz.getDeclaredConstructor();
                 constructor.setAccessible(true);
                 instance = (Vocabulary) constructor.newInstance();
             } catch (Exception e) {
@@ -173,6 +173,4 @@ public class RDFSchemaUtils {
         }
     }
 
-    private RDFSchemaUtils() {}
-
 }