You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2018/01/08 13:14:17 UTC
[1/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and
ANY23-131 Nested Microdata are not extracted
Repository: any23
Updated Branches:
refs/heads/master 97e364ae4 -> 6d0606f9b
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
index 53899fa..8409a61 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1-expected.nquads
@@ -16,12 +16,14 @@
#
<http://books.example.com/works/45U8QJGZSQKDH8N> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Work> <http://bob.example.com/> .
-<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/elements/1.1/creator> "Wil Wheaton" <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/BOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/EBOOK> <http://bob.example.com/> .
<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/terms/title> "Just a Geek" <http://bob.example.com/> .
-<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596007683.BOOK> <http://bob.example.com/> .
-<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596802189.EBOOK> <http://bob.example.com/> .
<http://books.example.com/products/9780596007683.BOOK> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Expression> <http://bob.example.com/> .
<http://books.example.com/products/9780596007683.BOOK> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/BOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596007683.BOOK> <http://bob.example.com/> .
<http://books.example.com/products/9780596802189.EBOOK> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/vocab/frbr/core#Expression> <http://bob.example.com/> .
<http://books.example.com/products/9780596802189.EBOOK> <http://purl.org/dc/terms/type> <http://books.example.com/product-types/EBOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/vocab/frbr/core#realization> <http://books.example.com/products/9780596802189.EBOOK> <http://bob.example.com/> .
+<http://books.example.com/works/45U8QJGZSQKDH8N> <http://purl.org/dc/elements/1.1/creator> "Wil\n Wheaton" <http://bob.example.com/> .
<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> <http://books.example.com/works/45U8QJGZSQKDH8N> <http://bob.example.com/> .
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
index e313ccd..da6148f 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-1.html
@@ -14,26 +14,30 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<dl itemscope
- itemtype="http://purl.org/vocab/frbr/core#Work"
- itemid="http://books.example.com/works/45U8QJGZSQKDH8N">
- <dt>Title</dt>
- <dd><cite itemprop="http://purl.org/dc/terms/title">Just a Geek</cite></dd>
- <dt>By</dt>
- <dd><span itemprop="http://purl.org/dc/elements/1.1/creator">Wil Wheaton</span></dd>
- <dt>Format</dt>
- <dd itemprop="http://purl.org/vocab/frbr/core#realization"
- itemscope
- itemtype="http://purl.org/vocab/frbr/core#Expression"
- itemid="http://books.example.com/products/9780596007683.BOOK">
- <link itemprop="http://purl.org/dc/terms/type" href="http://books.example.com/product-types/BOOK">
- Print
- </dd>
- <dd itemprop="http://purl.org/vocab/frbr/core#realization"
- itemscope
- itemtype="http://purl.org/vocab/frbr/core#Expression"
- itemid="http://books.example.com/products/9780596802189.EBOOK">
- <link itemprop="http://purl.org/dc/terms/type" href="http://books.example.com/product-types/EBOOK">
- Ebook
- </dd>
+<dl itemscope itemtype="http://purl.org/vocab/frbr/core#Work"
+ itemid="http://books.example.com/works/45U8QJGZSQKDH8N">
+ <dt>Title</dt>
+ <dd>
+ <cite itemprop="http://purl.org/dc/terms/title">Just a Geek</cite>
+ </dd>
+ <dt>By</dt>
+ <dd>
+ <span itemprop="http://purl.org/dc/elements/1.1/creator">Wil
+ Wheaton</span>
+ </dd>
+ <dt>Format</dt>
+ <dd itemprop="http://purl.org/vocab/frbr/core#realization" itemscope
+ itemtype="http://purl.org/vocab/frbr/core#Expression"
+ itemid="http://books.example.com/products/9780596007683.BOOK">
+ <link itemprop="http://purl.org/dc/terms/type"
+ href="http://books.example.com/product-types/BOOK">
+ Print
+ </dd>
+ <dd itemprop="http://purl.org/vocab/frbr/core#realization" itemscope
+ itemtype="http://purl.org/vocab/frbr/core#Expression"
+ itemid="http://books.example.com/products/9780596802189.EBOOK">
+ <link itemprop="http://purl.org/dc/terms/type"
+ href="http://books.example.com/product-types/EBOOK">
+ Ebook
+ </dd>
</dl>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
index eb6dcd9..2632f7a 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2-expected.nquads
@@ -15,18 +15,21 @@
# limitations under the License.
#
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node161nd8236x293102 <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node161nd8236x293103 <http://bob.example.com/> .
-_:node161nd8236x293102 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
-_:node161nd8236x293102 <http://microformats.org/profile/hcard/fn> "Princeton" <http://bob.example.com/> .
-_:node161nd8236x293102 <http://microformats.org/profile/hcard/n> _:node161nd8236x293104 <http://bob.example.com/> .
-_:node161nd8236x293102 <http://microformats.org/profile/hcard/adr> _:node161nd8236x293105 <http://bob.example.com/> .
-_:node161nd8236x293104 <http://microformats.org/profile/hcard/given-name> "Princeton" <http://bob.example.com/> .
-_:node161nd8236x293103 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
-_:node161nd8236x293103 <http://microformats.org/profile/hcard/fn> "Trekkie" <http://bob.example.com/> .
-_:node161nd8236x293103 <http://microformats.org/profile/hcard/n> _:node161nd8236x293106 <http://bob.example.com/> .
-_:node161nd8236x293103 <http://microformats.org/profile/hcard/adr> _:node161nd8236x293105 <http://bob.example.com/> .
-_:node161nd8236x293106 <http://microformats.org/profile/hcard/given-name> "Trekkie" <http://bob.example.com/> .
-_:node161nd8236x293105 <http://microformats.org/profile/hcard/street-address> "Avenue Q" <http://bob.example.com/> .
-<http://bob.example.com/> <http://vocab.sindice.net/date> "2011-06-08T12:56:39+01:00" <http://bob.example.com/> .
-<http://bob.example.com/> <http://vocab.sindice.net/size> "15"^^<http://www.w3.org/2001/XMLSchema#int> <http://bob.example.com/> .
\ No newline at end of file
+_:nodebdb2c525cf8095abb6954b51432e6 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/street-address> "Avenue Q" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/fn> "Princeton" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/given-name> "Princeton" <http://bob.example.com/> .
+_:node5194c3bb9d7f53e4759c6f393d95f88 <http://schema.org/given-name> "Princeton" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/n> _:node5194c3bb9d7f53e4759c6f393d95f88 <http://bob.example.com/> .
+_:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://schema.org/street-address> "Avenue Q" <http://bob.example.com/> .
+_:nodebdb2c525cf8095abb6954b51432e6 <http://microformats.org/profile/hcard/adr> _:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodebdb2c525cf8095abb6954b51432e6 <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://microformats.org/profile/hcard> <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/street-address> "Avenue Q" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/fn> "Trekkie" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/given-name> "Trekkie" <http://bob.example.com/> .
+_:node45173ea18b736c2e9c3136e52ed3727e <http://schema.org/given-name> "Trekkie" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/n> _:node45173ea18b736c2e9c3136e52ed3727e <http://bob.example.com/> .
+_:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://schema.org/street-address> "Avenue Q" <http://bob.example.com/> .
+_:node7a12e48e321d29211c8b7c2ce396854 <http://microformats.org/profile/hcard/adr> _:node1ffeb2699b75ba7aca5ee3d72adb55a8 <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node7a12e48e321d29211c8b7c2ce396854 <http://bob.example.com/> .
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
index 7234b21..e38d2a8 100644
--- a/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
+++ b/test-resources/src/test/resources/microdata/5.2.1-non-normative-example-2.html
@@ -15,12 +15,12 @@
limitations under the License.
-->
<p>
- Both
- <span itemscope itemtype="http://microformats.org/profile/hcard" itemref="home"><span itemprop="fn"
- ><span itemprop="n" itemscope><span itemprop="given-name">Princeton</span></span></span></span>
- and
- <span itemscope itemtype="http://microformats.org/profile/hcard" itemref="home"><span itemprop="fn"
- ><span itemprop="n" itemscope><span itemprop="given-name">Trekkie</span></span></span></span>
- live at
- <span id="home" itemprop="adr" itemscope><span itemprop="street-address">Avenue Q</span>.</span>
+ Both <span itemscope itemtype="http://microformats.org/profile/hcard"
+ itemref="home"><span itemprop="fn"><span
+ itemprop="n" itemscope><span itemprop="given-name">Princeton</span></span></span></span>
+ and <span itemscope itemtype="http://microformats.org/profile/hcard"
+ itemref="home"><span itemprop="fn"><span
+ itemprop="n" itemscope><span itemprop="given-name">Trekkie</span></span></span></span>
+ live at <span id="home" itemprop="adr" itemscope><span
+ itemprop="street-address">Avenue Q</span>.</span>
</p>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-basic-expected.properties b/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
index a020a4b..ee57018 100644
--- a/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
+++ b/test-resources/src/test/resources/microdata/microdata-basic-expected.properties
@@ -17,13 +17,13 @@
result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Elizabeth", "type" : "Plain" } } ] }
result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[2]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Daniel", "type" : "Plain" } } ] }
-result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[3]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "band", "value" : { "content" : "Four Parts Water", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[3]/SPAN[1]", "name" : "nationality", "value" : { "content" : "British", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Neil", "type" : "Plain" } } ] }
+result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[3]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[3]/SPAN[1]", "name" : "nationality", "value" : { "content" : "British", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Neil", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "band", "value" : { "content" : "Four Parts Water", "type" : "Plain" } } ] }
result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/IMG[1]", "name" : "image", "value" : { "content" : "google-logo.png", "type" : "Link" } } ] }
result4={ "xpath" : "/HTML[1]/BODY[1]/DIV[5]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[5]/TIME[1]", "name" : "birthday", "value" : { "content" : "2009-05-10", "type" : "Date" } } ] }
result5={ "xpath" : "/HTML[1]/BODY[1]/DIV[6]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[6]/UL[1]/LI[1]", "name" : "flavor", "value" : { "content" : "Lemon sorbet", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[6]/UL[1]/LI[2]", "name" : "flavor", "value" : { "content" : "Apricot sorbet", "type" : "Plain" } } ] }
result6={ "xpath" : "/HTML[1]/BODY[1]/DIV[7]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[7]/SPAN[1]", "name" : "favorite-fruit", "value" : { "content" : "orange", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[7]/SPAN[1]", "name" : "favorite-color", "value" : { "content" : "orange", "type" : "Plain" } } ] }
result7={ "xpath" : "/HTML[1]/BODY[1]/FIGURE[1]/FIGCAPTION[1]/SPAN[1]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/FIGURE[1]/FIGCAPTION[1]/SPAN[1]/SPAN[1]", "name" : "name", "value" : { "content" : "The Castle", "type" : "Plain" } } ] }
result8={ "xpath" : "/HTML[1]/BODY[1]/SPAN[1]", "id" : null, "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SPAN[1]/META[1]", "name" : "name", "value" : { "content" : "The Castle", "type" : "Plain" } } ] }
-result9={ "xpath" : "/HTML[1]/BODY[1]/SECTION[1]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair,\\n with a fluffy black fur with white paws and belly.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } } ] }
+result9={ "xpath" : "/HTML[1]/BODY[1]/SECTION[1]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[1]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair, with a fluffy black fur with white paws and belly.", "type" : "Plain" } } ] }
result10={ "xpath" : "/HTML[1]/BODY[1]/DL[1]", "id" : null, "refs" : [], "type" : "http://vocab.example.net/book", "itemid" : "urn:isbn:0-330-34032-8", "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DL[1]/DD[2]", "name" : "author", "value" : { "content" : "Peter F. Hamilton\\n ", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DL[1]/DD[1]", "name" : "title", "value" : { "content" : "The Reality Dysfunction\\n ", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DL[1]/DD[3]/TIME[1]", "name" : "pubdate", "value" : { "content" : "1996-01-26", "type" : "Date" } } ] }
-result11={ "xpath" : "/HTML[1]/BODY[1]/SECTION[2]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair, with a fluffy\\n black fur with\\n white paws and belly.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "http://example.com/fn", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[1]", "name" : "http://example.com/color", "value" : { "content" : "black", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[2]", "name" : "http://exa
mple.com/color", "value" : { "content" : "white", "type" : "Plain" } } ] }
\ No newline at end of file
+result11={ "xpath" : "/HTML[1]/BODY[1]/SECTION[2]", "id" : null, "refs" : [], "type" : "http://example.org/animals#cat", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[1]", "name" : "http://example.com/color", "value" : { "content" : "black", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]/SPAN[2]", "name" : "http://example.com/color", "value" : { "content" : "white", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/IMG[1]", "name" : "img", "value" : { "content" : "hedral.jpeg", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "http://example.com/fn", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/H1[1]", "name" : "name", "value" : { "content" : "Hedral", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/SECTION[2]/P[1]", "name" : "desc", "value" : { "content" : "Hedral is a male american domestic shorthair, with a fluffy black fur
with white paws and belly.", "type" : "Plain" } } ] }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-basic.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-basic.html b/test-resources/src/test/resources/microdata/microdata-basic.html
index e7d4dba..695d126 100644
--- a/test-resources/src/test/resources/microdata/microdata-basic.html
+++ b/test-resources/src/test/resources/microdata/microdata-basic.html
@@ -75,10 +75,8 @@
<figure>
<img src="castle.jpeg">
<figcaption>
- <span itemscope>
- <span itemprop="name">The Castle</span>
- </span>
- (1986)
+ <span itemscope> <span itemprop="name">The Castle</span>
+ </span> (1986)
</figcaption>
</figure>
@@ -92,8 +90,7 @@
<!-- result9 -->
<section itemscope itemtype="http://example.org/animals#cat">
<h1 itemprop="name">Hedral</h1>
- <p itemprop="desc">Hedral is a male american domestic shorthair,
- with a fluffy black fur with white paws and belly.</p>
+ <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy black fur with white paws and belly.</p>
<img itemprop="img" src="hedral.jpeg" alt=""
title="Hedral, age 18 months">
</section>
@@ -114,11 +111,9 @@
<!-- result11 -->
<section itemscope itemtype="http://example.org/animals#cat">
<h1 itemprop="name http://example.com/fn">Hedral</h1>
- <p itemprop="desc">
- Hedral is a male american domestic shorthair, with a fluffy <span
+ <p itemprop="desc">Hedral is a male american domestic shorthair, with a fluffy <span
itemprop="http://example.com/color">black</span> fur with <span
- itemprop="http://example.com/color">white</span> paws and belly.
- </p>
+ itemprop="http://example.com/color">white</span> paws and belly.</p>
<img itemprop="img" src="hedral.jpeg" alt=""
title="Hedral, age 18 months">
</section>
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties b/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
index 8b60e54..843e957 100644
--- a/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
+++ b/test-resources/src/test/resources/microdata/microdata-itemref-expected.properties
@@ -15,11 +15,15 @@
# limitations under the License.
#
-result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]", "id" : "is1", "refs" : [], "type" : "http://type/IScopeType1", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[1]", "name" : "p2", "value" : { "content" : "Some Text 1", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[1]", "name" : "p1", "value" : { "content" : "http://www.domain.org/path/1", "type" : "Link" } } ] }
-result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "id" : "is2", "refs" : ["ip5", "ip4", "ip3", "unexisting"], "type" : "http://type/IScopeType2", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/P[1]", "name" : "p6", "value" : { "content" : "Some Text 3", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/IMG[1]", "name" : "p5", "value" : { "content" : "http://source/dom/path", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } } ] }
-result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]", "id" : "loops", "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "name" : "next", "value" : { "c
ontent" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "id" : "loop4", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "name" : "self", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "id" : "loop0", "refs" : ["loop0"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[
3]/DIV[2]", "name" : "head", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "id" : "loop1", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }, "type" : "Nested" } } ] }
-result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "id" : "idItem", "refs" : ["insideOut"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via tree.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via parent, before.", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[3]", "name" : "prop", "value" : { "content" : "Included via parent, after.", "type" : "Plain" } } ] }
-error0={ "message" : "Unknown itemProp id 'unexisting'", "path" : "null", "begin_row" : -1, "begin_col" : -1, "end_row" : -1, "end_col" : -1 }
-error1={ "message" : "Duplicated deferred itemProp 'p5'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 26, "begin_col" : 5, "end_row" : 26, "end_col" : 97 }
-error2={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop0' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "begin_row" : 33, "begin_col" : 9, "end_row" : 33, "end_col" : 67 }
-error3={ "message" : "Loop detected with depth 3 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "begin_row" : 37, "begin_col" : 9, "end_row" : 37, "end_col" : 67 }
\ No newline at end of file
+result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]", "id" : "is1", "refs" : [], "type" : "http://type/IScopeType1", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[1]", "name" : "p1", "value" : { "content" : "http://www.domain.org/path/1", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[1]", "name" : "p2", "value" : { "content" : "Some Text 1", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/P[2]", "name" : "p3", "value" : { "content" : "Some Text 2", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[1]/A[2]", "name" : "p4", "value" : { "content" : "http://www.domain.org/path/2", "type" : "Link" } } ] }
+result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "id" : "is2", "refs" : ["ip5", "ip4", "ip3", "unexisting"], "type" : "http://type/IScopeType2", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/IMG[1]", "name" : "p5", "value" : { "content" : "http://source/dom/path", "type" : "Link" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]/P[1]", "name" : "p6", "value" : { "content" : "Some Text 3", "type" : "Plain" } } ] }
+result2={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]", "id" : "loops", "refs" : [], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "name" : "head", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "id" : "loop1", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "id" : "loop2", "refs" : ["loop3"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "name" : "next", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "id" : "loop3", "refs" : ["loop4"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "name" : "next", "value" : {
"content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "id" : "loop4", "refs" : ["loop2"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "name" : "self", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "id" : "loop0", "refs" : ["loop0"], "type" : null, "itemid" : null, "properties" : [ ] }, "type" : "Nested" } } ] }
+result3={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "id" : "idItem", "refs" : ["insideOut"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]/DIV[1]", "name" : "prop", "value" : { "content" : "Included via tree.", "type" : "Plain" } } ] }
+error0={ "message" : "Duplicated deferred itemProp 'p5'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 28, "begin_col" : 5, "end_row" : 29, "end_col" : 40 }
+error1={ "message" : "Duplicated deferred itemProp 'p6'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[2]", "begin_row" : 28, "begin_col" : 5, "end_row" : 29, "end_col" : 40 }
+error2={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop0' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[1]", "begin_row" : 36, "begin_col" : 7, "end_row" : 36, "end_col" : 65 }
+error3={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[2]", "begin_row" : 37, "begin_col" : 7, "end_row" : 37, "end_col" : 65 }
+error4={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop3' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[3]", "begin_row" : 39, "begin_col" : 7, "end_row" : 39, "end_col" : 65 }
+error5={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop4' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[4]", "begin_row" : 40, "begin_col" : 7, "end_row" : 40, "end_col" : 65 }
+error6={ "message" : "Loop detected with depth 1 while dereferencing itemProp 'loop2' .", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[3]/DIV[5]", "begin_row" : 41, "begin_col" : 7, "end_row" : 41, "end_col" : 65 }
+error7={ "message" : "Duplicated deferred itemProp 'prop'.", "path" : "/HTML[1]/BODY[1]/DIV[1]/DIV[4]/DIV[2]", "begin_row" : 48, "begin_col" : 7, "end_row" : 48, "end_col" : 54 }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-itemref.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-itemref.html b/test-resources/src/test/resources/microdata/microdata-itemref.html
index d133330..f8ff990 100644
--- a/test-resources/src/test/resources/microdata/microdata-itemref.html
+++ b/test-resources/src/test/resources/microdata/microdata-itemref.html
@@ -14,35 +14,43 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<div id="data">
+<html>
+<body>
+ <div id="data">
<div id="is1" itemscope itemtype="http://type/IScopeType1">
- <a id="ip1" itemprop="p1" href="http://www.domain.org/path/1"></a>
- <p id="ip2" itemprop="p2">Some Text 1</p>
- <p id="ip3" itemprop="p3">Some Text 2</p>
- <a id="ip4" itemprop="p4" href="http://www.domain.org/path/2"></a>
+ <a id="ip1" itemprop="p1" href="http://www.domain.org/path/1"></a>
+ <p id="ip2" itemprop="p2">Some Text 1</p>
+ <p id="ip3" itemprop="p3">Some Text 2</p>
+ <a id="ip4" itemprop="p4" href="http://www.domain.org/path/2"></a>
</div>
<!-- Duplicate and unexisting itemrefs. -->
- <div id="is2" itemscope itemtype="http://type/IScopeType2" itemref="ip5 ip4 ip3 unexisting">
- <img id="ip5" itemprop="p5" src="http://source/dom/path">
- <p id="ip6" itemprop="p6">Some Text 3</p>
+ <div id="is2" itemscope itemtype="http://type/IScopeType2"
+ itemref="ip5 ip4 ip3 unexisting">
+ <img id="ip5" itemprop="p5" src="http://source/dom/path">
+ <p id="ip6" itemprop="p6">Some Text 3</p>
</div>
<!-- Loops. -->
<div id="loops" itemscope>
- <div id="loop0" itemprop="self" itemscope itemref="loop0"></div>
- <div id="loop1" itemprop="head" itemscope itemref="loop2">not in the loop</div>
- <div id="loop2" itemprop="next" itemscope itemref="loop3"></div>
- <div id="loop3" itemprop="next" itemscope itemref="loop4"></div>
- <div id="loop4" itemprop="next" itemscope itemref="loop2"></div>
+ <div id="loop0" itemprop="self" itemscope itemref="loop0"></div>
+ <div id="loop1" itemprop="head" itemscope itemref="loop2">not
+ in the loop</div>
+ <div id="loop2" itemprop="next" itemscope itemref="loop3"></div>
+ <div id="loop3" itemprop="next" itemscope itemref="loop4"></div>
+ <div id="loop4" itemprop="next" itemscope itemref="loop2"></div>
</div>
<!-- Inside - Out Nesting. -->
<div id="insideOut">
- <div id="idBefore" itemprop="prop">Included via parent, before.</div>
- <div id="idItem" itemscope itemref="insideOut">
- <div id="ioChild" itemprop="prop">Included via tree.</div>
- </div>
- <div id="idAfter" itemprop="prop">Included via parent, after.</div>
+ <div id="idBefore" itemprop="prop">Included via parent,
+ before.</div>
+ <div id="idItem" itemscope itemref="insideOut">
+ <div id="ioChild" itemprop="prop">Included via tree.</div>
+ </div>
+ <div id="idAfter" itemprop="prop">Included via parent,
+ after.</div>
</div>
-</div>
+ </div>
+</body>
+</html>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-json-serialization.json
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-json-serialization.json b/test-resources/src/test/resources/microdata/microdata-json-serialization.json
index 27692dc..6a3beec 100644
--- a/test-resources/src/test/resources/microdata/microdata-json-serialization.json
+++ b/test-resources/src/test/resources/microdata/microdata-json-serialization.json
@@ -1 +1 @@
-{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "name" : "band", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : "b", "refs" : ["c"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Jazz Band", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "size", "value" : { "content" : "12", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Amanda", "type" : "Plain" } } ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath"
: "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } } ] }] }
\ No newline at end of file
+{ "result" : [{ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }] }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
index fa6686d..dbf6d4a 100644
--- a/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
+++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.nquads
@@ -15,14 +15,11 @@
# limitations under the License.
#
-_:node3e103839c5eaa86c975e26a96157bf2 <http://schema.org/name> "Jazz Band" <http://bob.example.com/> .
-_:node3e103839c5eaa86c975e26a96157bf2 <http://schema.org/size> "12" <http://bob.example.com/> .
-_:node70664686c06639b5211a24a9cf34f99 <http://schema.org/band> _:node3e103839c5eaa86c975e26a96157bf2 <http://bob.example.com/> .
-_:node70664686c06639b5211a24a9cf34f99 <http://schema.org/name> "Amanda" <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node70664686c06639b5211a24a9cf34f99 <http://bob.example.com/> .
-_:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Movie> <http://bob.example.com/> .
-_:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://schema.org/Movie/name> "Avatar" <http://bob.example.com/> .
-_:noded2bffd25e1a8448b159c4c45afd3283 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://bob.example.com/> .
-_:noded2bffd25e1a8448b159c4c45afd3283 <http://schema.org/Person/name> "James Cameron" <http://bob.example.com/> .
-_:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://schema.org/Movie/director> _:noded2bffd25e1a8448b159c4c45afd3283 <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodea2c65a1b5a43d4ac4bd93b3216f5f7d <http://bob.example.com/> .
\ No newline at end of file
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node295195eb5d5124e03da26bafc7313bc <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Movie> <http://bob.example.com/> .
+_:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> <http://bob.example.com/> .
+_:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://schema.org/Person/name> "James Cameron" <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/Movie/director> _:node1fd8d9ab2f041cdaecbae55b76fadc1 <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/Movie/name> "Avatar" <http://bob.example.com/> .
+_:node3ecb85b37ebfd65a5d57ab82374a5 <http://schema.org/Movie/name> "James Cameron" <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node3ecb85b37ebfd65a5d57ab82374a5 <http://bob.example.com/> .
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
index 2f65ec4..ca05f33 100644
--- a/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
+++ b/test-resources/src/test/resources/microdata/microdata-nested-expected.properties
@@ -15,5 +15,5 @@
# limitations under the License.
#
-result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "name" : "band", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[2]", "id" : "b", "refs" : ["c"], "type" : null, "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Jazz Band", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[3]/P[2]/SPAN[1]", "name" : "size", "value" : { "content" : "12", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/P[1]/SPAN[1]", "name" : "name", "value" : { "content" : "Amanda", "type" : "Plain" } } ] }
-result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } } ] }
\ No newline at end of file
+result0={ "xpath" : "/HTML[1]/BODY[1]/DIV[1]", "id" : "amanda", "refs" : ["a", "b"], "type" : null, "itemid" : null, "properties" : [ ] }
+result1={ "xpath" : "/HTML[1]/BODY[1]/DIV[4]", "id" : null, "refs" : [], "type" : "http://schema.org/Movie", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "name" : "director", "value" : { "content" : { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]", "id" : null, "refs" : [], "type" : "http://schema.org/Person", "itemid" : null, "properties" : [ { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }, "type" : "Nested" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/H1[1]", "name" : "name", "value" : { "content" : "Avatar", "type" : "Plain" } }, { "xpath" : "/HTML[1]/BODY[1]/DIV[4]/DIV[1]/SPAN[1]", "name" : "name", "value" : { "content" : "James Cameron", "type" : "Plain" } } ] }
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-nested.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-nested.html b/test-resources/src/test/resources/microdata/microdata-nested.html
index aaf29d6..bc81642 100644
--- a/test-resources/src/test/resources/microdata/microdata-nested.html
+++ b/test-resources/src/test/resources/microdata/microdata-nested.html
@@ -18,22 +18,29 @@
<head>
<body>
-<!-- result0 -->
-<div itemscope id="amanda" itemref="a b"></div>
-<p id="a">Name: <span itemprop="name">Amanda</span></p>
-<div id="b" itemprop="band" itemscope itemref="c"></div>
-<div id="c">
- <p>Band: <span itemprop="name">Jazz Band</span></p>
- <p>Size: <span itemprop="size">12</span> players</p>
-</div>
+ <!-- result0 -->
+ <div itemscope id="amanda" itemref="a b"></div>
+ <p id="a">
+ Name: <span itemprop="name">Amanda</span>
+ </p>
+ <div id="b" itemprop="band" itemscope itemref="c"></div>
+ <div id="c">
+ <p>
+ Band: <span itemprop="name">Jazz Band</span>
+ </p>
+ <p>
+ Size: <span itemprop="size">12</span> players
+ </p>
+ </div>
-<!-- result1 -->
-<div itemscope itemtype="http://schema.org/Movie">
+ <!-- result1 -->
+ <div itemscope itemtype="http://schema.org/Movie">
<h1 itemprop="name">Avatar</h1>
- <div itemprop="director" itemscope itemtype="http://schema.org/Person">
- Director: <span itemprop="name">James Cameron</span>
+ <div itemprop="director" itemscope
+ itemtype="http://schema.org/Person">
+ Director: <span itemprop="name">James Cameron</span>
</div>
-</div>
+ </div>
</body>
</head>
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads b/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
index 9ea9752..f59e6a0 100644
--- a/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
+++ b/test-resources/src/test/resources/microdata/microdata-richsnippet-expected.nquads
@@ -15,15 +15,18 @@
# limitations under the License.
#
-_:nodee6ae2861b16a51a6519079d82c13763 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Person> <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/title> "graduate research assistant" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/nickname> "Johnny" <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Address> <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://data-vocabulary.org/Address/region> "Georgia" <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://data-vocabulary.org/Address/locality> "Warner Robins" <http://bob.example.com/> .
-_:node14cf7264b949d7a9e18598d198e5f65 <http://data-vocabulary.org/Address/street-address> "1234 Peach Drive" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/address> _:node14cf7264b949d7a9e18598d198e5f65 <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/name> "John Doe" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/affiliation> "University of Dreams" <http://bob.example.com/> .
-_:nodee6ae2861b16a51a6519079d82c13763 <http://data-vocabulary.org/Person/url> <http://www.JohnnyD.com> <http://bob.example.com/> .
-<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:nodee6ae2861b16a51a6519079d82c13763 <http://bob.example.com/> .
\ No newline at end of file
+_:node9423934b5f186fd49d90edd31b5625ba <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Person> <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://data-vocabulary.org/Address> <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/Address/street-address> "1234 Peach Drive" <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/Address/locality> "Warner Robins" <http://bob.example.com/> .
+_:nodee94f8737ad89876c85bd87156a1eb585 <http://data-vocabulary.org/Address/region> "Georgia" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/address> _:nodee94f8737ad89876c85bd87156a1eb585 <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/affiliation> "University of Dreams" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/street-address> "1234 Peach Drive" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/name> "John Doe" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/nickname> "Johnny" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/locality> "Warner Robins" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/title> "graduate research assistant" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/region> "Georgia" <http://bob.example.com/> .
+_:node9423934b5f186fd49d90edd31b5625ba <http://data-vocabulary.org/Person/url> <http://www.JohnnyD.com> <http://bob.example.com/> .
+<http://bob.example.com/> <http://www.w3.org/1999/xhtml/microdata#item> _:node9423934b5f186fd49d90edd31b5625ba <http://bob.example.com/> .
\ No newline at end of file
[2/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and
ANY23-131 Nested Microdata are not extracted
Posted by le...@apache.org.
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
index c505bb8..1bffcee 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
@@ -17,13 +17,15 @@
package org.apache.any23.extractor.html;
-import junit.framework.Assert;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.VCard;
-import org.junit.Ignore;
import org.junit.Test;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
@@ -40,929 +42,927 @@ import java.util.List;
*/
public class HCardExtractorTest extends AbstractExtractorTestCase {
- private static final VCard vVCARD = VCard.getInstance();
-
- protected ExtractorFactory<?> getExtractorFactory() {
- return new HCardExtractorFactory();
- }
-
- @Test
- public void testEMailNotUriReal() throws Exception {
- assertExtract("/microformats/hcard/17-email-not-uri.html");
- assertDefaultVCard();
- assertJohn();
- assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
- }
-
- @Test
- public void testTel() throws Exception {
- assertExtract("/microformats/hcard/21-tel.html");
- assertDefaultVCard();
- String[] tels = { "+1.415.555.1231", "+1.415.555.1235",
- "+1.415.555.1236", "+1.415.555.1237", "+1.415.555.1238",
- "+1.415.555.1239", "+1.415.555.1240", "+1.415.555.1241",
- "+1.415.555.1242", "+1.415.555.1243" };
- for (String tel : tels) {
- assertContains(vVCARD.tel, RDFUtils.iri("tel:" + tel));
- }
- Resource telResource = RDFUtils.iri("tel:+14155551233");
- assertContains(vVCARD.fax, telResource);
- assertContains(vVCARD.workTel, telResource);
- assertContains(vVCARD.homeTel, telResource);
- assertJohn();
- }
-
- @Test
- public void testAbbrTitleEverything() throws Exception {
- assertExtract("/microformats/hcard/23-abbr-title-everything.html");
- assertDefaultVCard();
-
- assertContains(vVCARD.fn, "John Doe");
- assertContains(vVCARD.nickname, "JJ");
-
- assertContains(vVCARD.given_name, "Jonathan");
- assertContains(vVCARD.additional_name, "John");
- assertContains(vVCARD.family_name, "Doe-Smith");
- assertContains(vVCARD.honorific_suffix, "Medical Doctor");
-
- assertContains(vVCARD.title, "President");
- assertContains(vVCARD.role, "Chief");
- assertContains(vVCARD.tz, "-0700");
- assertContains(vVCARD.bday, "2006-04-04");
- assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
- assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
- assertContains(vVCARD.class_, "public");
- assertContains(vVCARD.note, "this is a note");
- assertContains(vVCARD.organization_name, "Intellicorp");
- assertContains(vVCARD.organization_unit, "Intelligence");
-
- // We define the property in this extractor _but_ we do not parse it.
- assertContains(vVCARD.geo, (Resource) null);
- // Thus we do not cointain these.
- // The interaction is in @link RDFMergerTest.java
- assertNotContains(RDF.TYPE, vVCARD.Location);
- assertNotContains(null, vVCARD.latitude, "37.77");
- assertNotContains(null, vVCARD.longitude, "-122.41");
-
- // see above
- assertContains(vVCARD.adr, (Resource) null);
- assertNotContains(RDF.TYPE, vVCARD.Address);
- assertNotContains(null, vVCARD.post_office_box, "Box 1234");
- assertNotContains(null, vVCARD.extended_address, "Suite 100");
- assertNotContains(null, vVCARD.street_address, "123 Fake Street");
- assertNotContains(null, vVCARD.locality, "San Francisco");
- assertNotContains(null, vVCARD.region, "California");
- assertNotContains(null, vVCARD.postal_code, "12345-6789");
- assertNotContains(null, vVCARD.country_name, "United States of America");
- assertNotContains(null, vVCARD.addressType, "work");
- }
-
- @Test
- public void testGeoAbbr() throws Exception {
- assertExtract("/microformats/hcard/25-geo-abbr.html");
- assertModelNotEmpty();
- assertContains(vVCARD.fn, "Paradise");
- assertContains(RDF.TYPE, vVCARD.Organization);
- assertContains(vVCARD.organization_name, "Paradise");
- // See above: geo property yes, gteo blank node no.
- assertContains(vVCARD.geo, (Resource) null);
- assertNotContains(RDF.TYPE, vVCARD.Location);
- assertNotContains(null, vVCARD.latitude, "30.267991");
- assertNotContains(null, vVCARD.longitude, "-97.739568");
- }
-
- @Test
- public void testAncestors() throws Exception {
- assertExtract("/microformats/hcard/26-ancestors.html");
- assertModelNotEmpty();
-
- assertContains(vVCARD.fn, "John Doe");
- assertNotContains(null, vVCARD.fn,
- "Mister Jonathan John Doe-Smith Medical Doctor");
- assertContains(vVCARD.nickname, "JJ");
- assertNotContains(RDF.TYPE, vVCARD.Address);
- assertContains(vVCARD.tz, "-0700");
- assertContains(vVCARD.title, "President");
- assertContains(vVCARD.role, "Chief");
- assertContains(vVCARD.organization_name, "Intellicorp");
- assertContains(vVCARD.organization_unit, "Intelligence");
-
- assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
- assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
- assertContains(vVCARD.note, "this is a note");
- assertContains(vVCARD.class_, "public");
-
- assertNotContains(RDF.TYPE, vVCARD.Location);
- assertContains(vVCARD.geo, (Resource) null);
- assertNotContains(null, vVCARD.latitude, "37.77");
- assertNotContains(null, vVCARD.longitude, "-122.41");
-
- assertContains(RDF.TYPE, vVCARD.Name);
- assertContains(vVCARD.additional_name, "John");
- assertContains(vVCARD.given_name, "Jonathan");
- assertContains(vVCARD.family_name, "Doe-Smith");
- assertContains(vVCARD.honorific_prefix, "Mister");
- assertContains(vVCARD.honorific_suffix, "Medical Doctor");
-
- assertNotContains(null, vVCARD.post_office_box, "Box 1234");
- assertNotContains(null, vVCARD.extended_address, "Suite 100");
- assertNotContains(null, vVCARD.street_address, "123 Fake Street");
- assertNotContains(null, vVCARD.locality, "San Francisco");
- assertNotContains(null, vVCARD.region, "California");
- assertNotContains(null, vVCARD.postal_code, "12345-6789");
- assertNotContains(null, vVCARD.country_name, "United States of America");
- assertNotContains(null, vVCARD.addressType, "work");
- }
-
- @Test
- public void testfnOrg() throws Exception {
- assertExtract("/microformats/hcard/30-fn-org.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
- RepositoryResult<Statement> repositoryResult = getStatements(null,
- RDF.TYPE, vVCARD.VCard);
- try {
- while (repositoryResult.hasNext()) {
- Resource card = repositoryResult.next().getSubject();
- Assert.assertNotNull(findObject(card, vVCARD.fn));
- String name = findObjectAsLiteral(card, vVCARD.fn);
-
- Assert.assertNotNull(findObject(card, vVCARD.org));
- Resource org = findObjectAsResource(card, vVCARD.org);
- Assert.assertNotNull(findObject(org, vVCARD.organization_name));
-
- if (name.equals("Dan Connolly")) {
- Assert.assertNotNull(findObject(card, vVCARD.n));
- Assert.assertFalse(name.equals(org.stringValue()));
- }
- }
- } finally {
- repositoryResult.close();
- }
- }
-
- @Test
- public void testInclude() throws Exception {
- assertExtract("/microformats/hcard/31-include.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
- assertStatementsSize(vVCARD.email, (Value) null, 3);
-
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- try {
- while (statements.hasNext()) {
- Resource vcard = statements.next().getSubject();
-
- Assert.assertNotNull(findObject(vcard, vVCARD.fn));
- Assert.assertEquals("Brian Suda",
- findObjectAsLiteral(vcard, vVCARD.fn));
-
- Assert.assertNotNull(findObject(vcard, vVCARD.url));
- String url = findObjectAsResource(vcard, vVCARD.url)
- .stringValue();
- Assert.assertEquals("http://suda.co.uk/", url);
-
- Resource name = findObjectAsResource(vcard, vVCARD.n);
- Assert.assertEquals("Brian",
- findObjectAsLiteral(name, vVCARD.given_name));
- Assert.assertEquals("Suda",
- findObjectAsLiteral(name, vVCARD.family_name));
-
- // Included data.
- Assert.assertNotNull(findObject(vcard, vVCARD.email));
- String mail = findObjectAsLiteral(vcard, vVCARD.email);
- Assert.assertEquals("mailto:correct@example.com", mail);
- }
- } finally {
- statements.close();
- }
- }
-
- @Test
- public void testHeader() throws Exception {
- assertExtract("/microformats/hcard/32-header.html");
- assertModelNotEmpty();
- // check fn, name, family, nick.
- assertJohn();
-
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- try {
- Resource example = RDFUtils.iri("http://example.org/");
- while (statements.hasNext()) {
- Resource card = statements.next().getSubject();
- Assert.assertNotNull(findObject(card, vVCARD.fn));
-
- String fn = findObjectAsLiteral(card, vVCARD.fn);
- if ("Jane Doe".equals(fn)) {
- assertNotFound(card, vVCARD.org);
- } else {
- Assert.assertTrue("John Doe".equals(fn)
- || "Brian Suda".equals(fn));
-
- Assert.assertNotNull(findObject(card, vVCARD.url));
- Assert.assertEquals(example,
- findObjectAsResource(card, vVCARD.url));
-
- Assert.assertNotNull(findObject(card, vVCARD.org));
- Resource org = findObjectAsResource(card, vVCARD.org);
- assertContains(org, RDF.TYPE, vVCARD.Organization);
- Assert.assertNotNull(org);
- Assert.assertNotNull(findObject(card, vVCARD.org));
- Assert.assertNotNull(findObject(org,
- vVCARD.organization_name));
- Assert.assertEquals("example.org",
- findObjectAsLiteral(org, vVCARD.organization_name));
- }
- }
- // Just to be sure there are no spurious statements.
- // assertStatementsSize(VCARD.org, null, 2);
- assertStatementsSize(vVCARD.url, example, 2);
- } finally {
- statements.close();
- }
- }
-
- @Test
- public void testAreaFull() throws Exception {
- assertExtract("/microformats/hcard/33-area.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
-
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- while (statements.hasNext()) {
- Resource vcard = statements.next().getSubject();
- final Value fnValue = findObject(vcard, vVCARD.fn);
- Assert.assertNotNull(fnValue);
- String fn = fnValue.stringValue();
- final Value vcardValue = findObject(vcard, vVCARD.url);
- Assert.assertNotNull(vcardValue);
- String url = vcardValue.stringValue();
- final Value emailValue = findObject(vcard, vVCARD.email);
- Assert.assertNotNull(emailValue);
- String mail = emailValue.stringValue();
- Assert.assertEquals("Joe Public", fn);
- Assert.assertEquals("http://example.com/", url);
- Assert.assertEquals("mailto:joe@example.com", mail);
- }
- }
-
- @Test
- public void testCategories() throws Exception {
- assertExtract("/microformats/hcard/36-categories.html");
- assertModelNotEmpty();
- assertContains(vVCARD.given_name, "Joe");
- assertContains(vVCARD.given_name, "john");
- assertContains(vVCARD.family_name, "doe");
- assertContains(vVCARD.family_name, "User");
- assertContains(vVCARD.fn, "john doe");
- assertContains(vVCARD.fn, "Joe User");
-
- assertContains(vVCARD.category, "C1");
- assertContains(vVCARD.category, "C2a");
- assertContains(vVCARD.category, "C4");
- assertContains(vVCARD.category, "User");
- String[] cats = { "C3", "C5", "C6", "C7", "C9", "luser", "D1", "D2",
- "D3" };
- for (String cat : cats)
- assertContains(vVCARD.category, "http://example.com/tag/" + cat);
-
- assertNotContains(null, vVCARD.category, "D4");
- }
-
- @Test
- public void testSingleton() throws Exception {
- // this tests probably tests that e just get the first fn and so on
- assertExtract("/microformats/hcard/37-singleton.html");
- assertModelNotEmpty();
- assertStatementsSize(vVCARD.fn, (Value) null, 1);
- assertContains(vVCARD.fn, "john doe 1");
-
- assertStatementsSize(RDF.TYPE, vVCARD.Name, 1);
- assertStatementsSize(vVCARD.given_name, (Value) null, 1);
- assertContains(vVCARD.given_name, "john");
- assertStatementsSize(vVCARD.family_name, (Value) null, 1);
- assertContains(vVCARD.family_name, "doe");
- assertStatementsSize(vVCARD.sort_string, (Value) null, 1);
- assertContains(vVCARD.sort_string, "d");
-
- assertStatementsSize(vVCARD.bday, (Value) null, 1);
- assertContains(vVCARD.bday, "20060707");
- assertStatementsSize(vVCARD.rev, (Value) null, 1);
- assertContains(vVCARD.rev, "20060707");
- assertStatementsSize(vVCARD.class_, (Value) null, 1);
- assertContains(vVCARD.class_, "public");
- assertStatementsSize(vVCARD.tz, (Value) null, 1);
- assertContains(vVCARD.tz, "+0600");
-
- // Why 0? because the extractor does not look at geo uF!
- assertStatementsSize(RDF.TYPE, vVCARD.Location, 0);
- assertStatementsSize(vVCARD.geo, (Value) null, 2);
-
- assertNotContains(null, vVCARD.latitude, "123.45");
- assertNotContains(null, vVCARD.longitude, "67.89");
-
- assertStatementsSize(vVCARD.uid, (Value) null, 1);
- assertContains(vVCARD.uid, "unique-id-1");
- }
-
- @Test
- public void testUidFull() throws Exception {
- assertExtract("/microformats/hcard/38-uid.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
-
- try {
- while (statements.hasNext()) {
- Resource vcard = statements.next().getSubject();
- Assert.assertNotNull(findObject(vcard, vVCARD.fn));
- String fn = findObjectAsLiteral(vcard, vVCARD.fn);
- Assert.assertEquals("Ryan King", fn);
-
- Assert.assertNotNull(findObject(vcard, vVCARD.n));
- Resource n = findObjectAsResource(vcard, vVCARD.n);
- Assert.assertNotNull(n);
- Assert.assertNotNull(findObject(n, vVCARD.given_name));
- Assert.assertEquals("Ryan",
- findObjectAsLiteral(n, vVCARD.given_name));
- Assert.assertNotNull(findObject(n, vVCARD.family_name));
- Assert.assertEquals("King",
- findObjectAsLiteral(n, vVCARD.family_name));
-
- Assert.assertNotNull(findObject(vcard, vVCARD.url));
- Resource url = findObjectAsResource(vcard, vVCARD.url);
-
- Assert.assertNotNull(findObject(vcard, vVCARD.uid));
- String uid = findObjectAsLiteral(vcard, vVCARD.uid);
-
- Assert.assertEquals("http://theryanking.com/contact/",
- url.stringValue());
- Assert.assertEquals("http://theryanking.com/contact/", uid);
- }
- } finally {
- statements.close();
- }
- }
-
- @Test
- public void testRomanianWikipedia() throws Exception {
- assertExtract("/microformats/hcard/40-fn-inside-adr.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
-
- try {
- while (statements.hasNext()) {
- Resource card = statements.next().getSubject();
- Assert.assertNotNull(findObject(card, vVCARD.fn));
- String fn = findObjectAsLiteral(card, vVCARD.fn);
- Assert.assertEquals("Berlin", fn);
-
- Assert.assertNotNull(findObject(card, vVCARD.org));
- Resource org = findObjectAsResource(card, vVCARD.org);
- assertContains(org, RDF.TYPE, vVCARD.Organization);
- Assert.assertNotNull(org);
- Assert.assertNotNull(findObject(card, vVCARD.org));
- Assert.assertNotNull(findObject(org, vVCARD.organization_name));
- Assert.assertEquals("Berlin",
- findObjectAsLiteral(org, vVCARD.organization_name));
-
- }
- } finally {
- statements.close();
- }
- }
-
- @Test
- public void testNoMicroformats() throws Exception, IOException,
- ExtractionException {
- extract("/html/html-without-uf.html");
- assertModelEmpty();
- }
-
- @Test
- public void testBasic() throws Exception {
- assertExtract("/microformats/hcard/01-tantek-basic.html");
- assertModelNotEmpty();
- assertContains(RDF.TYPE, vVCARD.VCard);
- // assertContains(RDF.TYPE, vVCARD.Organization);
- assertContains(RDF.TYPE, vVCARD.Name);
- // assertContains(vVCARD.organization_name, "Technorati");
- Resource person = findExactlyOneBlankSubject(vVCARD.fn,
- RDFUtils.literal("Tantek Celik"));
- Assert.assertNotNull(person);
- Resource org = findExactlyOneBlankSubject(vVCARD.organization_name,
- RDFUtils.literal("Technorati"));
- Assert.assertNotNull(org);
- assertContains(person, vVCARD.url, RDFUtils.iri("http://tantek.com/"));
- assertContains(person, vVCARD.n, (Resource) null);
- assertContains(person, vVCARD.org, (Resource) null);
- }
-
- @Test
- public void testMultipleclassNamesOnVCard() throws Exception {
- assertExtract("/microformats/hcard/02-multiple-class-names-on-vcard.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
- Resource name;
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- while (statements.hasNext()) {
- name = statements.next().getSubject();
- assertContains(name, vVCARD.fn, "Ryan King");
- }
- }
-
- @Test
- public void testImpliedNames() throws Exception {
- String[] ns = { "Ryan King", "King", "Ryan",
-
- "Ryan King", "King", "Ryan",
-
- "Ryan King", "King", "Ryan",
-
- "Brian Suda", "Suda", "Brian",
-
- "King, Ryan", "King", "Ryan",
-
- "King, R", "King", "R",
-
- "King R", "R", "King",
-
- "R King", "King", "R",
-
- "King R.", "R.", "King",
-
- "Jesse James Garrett", "Garrett", "Jesse",
-
- "Thomas Vander Wall", "Wall", "Thomas" };
- List<String> NAMES = Arrays.asList(ns);
- assertExtract("/microformats/hcard/03-implied-n.html");
- assertModelNotEmpty();
-
- RepositoryResult<Statement> statements = getStatements(null, vVCARD.fn,
- null);
- Resource vcard;
- int count = 0;
- try {
- while (statements.hasNext()) {
- vcard = statements.next().getSubject();
- assertContains(vcard, RDF.TYPE, vVCARD.VCard);
- Resource name = findObjectAsResource(vcard, vVCARD.n);
-
- final String objLiteral = findObjectAsLiteral(vcard, vVCARD.fn);
- int idx = NAMES.indexOf(objLiteral);
- Assert.assertTrue(
- String.format("not in names: '%s'", objLiteral),
- idx >= 0);
- Assert.assertEquals(NAMES.get(idx + 1),
- findObjectAsLiteral(name, vVCARD.family_name));
- Assert.assertEquals(NAMES.get(idx + 2),
- findObjectAsLiteral(name, vVCARD.given_name));
- count++;
- }
- } finally {
- statements.close();
- }
- Assert.assertEquals(10, count);
- }
-
- @Test
- public void testIgnoreUnknowns() throws Exception {
- assertExtract("/microformats/hcard/04-ignore-unknowns.html");
- assertDefaultVCard();
- assertContains(vVCARD.fn, "Ryan King");
- assertContains(vVCARD.n, (Resource) null);
- assertContains(null, "Ryan");
- assertContains(vVCARD.given_name, "Ryan");
- assertContains(vVCARD.family_name, "King");
- }
-
- @Test
- public void testMailto1() throws Exception {
- assertExtract("/microformats/hcard/05-mailto-1.html");
- assertDefaultVCard();
- assertContains(vVCARD.fn, "Ryan King");
- assertContains(RDF.TYPE, vVCARD.Name);
-
- assertContains(vVCARD.email, RDFUtils.iri("mailto:ryan@technorati.com"));
-
- assertContains(vVCARD.given_name, "Ryan");
- assertContains(vVCARD.family_name, "King");
- }
-
- @Test
- public void testMailto2() throws Exception {
- assertExtract("/microformats/hcard/06-mailto-2.html");
- assertDefaultVCard();
- assertContains(vVCARD.fn, "Brian Suda");
-
- assertContains(vVCARD.email, RDFUtils.iri("mailto:brian@example.com"));
- assertContains(vVCARD.given_name, "Brian");
- assertContains(vVCARD.family_name, "Suda");
- }
-
- @Test
- public void testRelativeUrl() throws Exception {
- assertExtract("/microformats/hcard/07-relative-url.html");
- assertDefaultVCard();
- assertJohn();
- assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
- }
-
- @Test
- public void testRelativeUrlBase() throws Exception {
- assertExtract("/microformats/hcard/08-relative-url-base.html");
- assertDefaultVCard();
- assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
- assertJohn();
- }
-
- @Test
- public void testRelativeUrlXmlBase1() throws Exception {
- assertExtract("/microformats/hcard/09-relative-url-xmlbase-1.html");
- assertDefaultVCard();
- assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
- assertJohn();
- }
-
- @Test
- public void testRelativeUrlXmlBase2() throws Exception {
- assertExtract("/microformats/hcard/10-relative-url-xmlbase-2.html");
- assertDefaultVCard();
- assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
- assertJohn();
- }
-
- @Test
- public void testMultipleUrls() throws Exception {
- assertExtract("/microformats/hcard/11-multiple-urls.html");
- assertDefaultVCard();
- assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/foo")));
- assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/bar")));
-
- assertJohn();
- }
-
- @Test
- public void testImageSrc() throws Exception {
- assertExtract("/microformats/hcard/12-img-src-url.html");
- assertDefaultVCard();
- assertJohn();
- }
-
- @Test
- public void testPhotoLogo() throws Exception {
- assertExtract("/microformats/hcard/13-photo-logo.html");
- assertDefaultVCard();
- assertContains(vVCARD.photo,
- RDFUtils.iri(("http://example.org/picture1.png")));
- assertContains(vVCARD.photo,
- RDFUtils.iri(("http://example.org/picture2.png")));
- assertContains(vVCARD.logo,
- RDFUtils.iri(("http://example.org/picture1.png")));
- assertContains(vVCARD.logo,
- RDFUtils.iri(("http://example.org/picture2.png")));
- assertJohn();
- }
-
- @Test
- public void testImgSrcDataUrl() throws Exception {
- assertExtract("/microformats/hcard/14-img-src-data-url.html");
- assertDefaultVCard();
- Resource data = RDFUtils.iri("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAMAAAAp4XiDAAAABGdBTUEAAK/"
- + "INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAASUExURf///8zMzJmZmWZmZ"
- + "jMzMwAAAPOPemkAAAM1SURBVHjaYmBgYGBkYQUBFkYWFiCPCchixQAMCCZAACF0MAMVM4K4TFh0IGsBCC"
- + "AkOxhYmBnAAKaHhZkZmxaAAGJgYIbpYGBihGgBWsTMzMwE4jIhaWGAYoAAYmCECDExYAcwGxkg5oNIgAB"
- + "igDqLARdgZmGB2wICrKwAAcSA3xKgIxlZ0PwCEEAMBCxhgHoWSQtAADFAAxgfYEJ1GEAAQbQw4tUCsocB"
- + "YQVAADEgu4uRkREeUCwszEwwLhOKLQABhNDCBA4aSDgwwhIAJKqYUPwCEEAMUK/AUwnc9aywJMCI7DAgA"
- + "AggBohZ8JTBhGIJzCoWZL8ABBCYidAB8RUjWppkYUG2BSCAGMDqEMZiswUtXgACiAHsFYixTMywGGLGpgU"
- + "WYgABxAA2mQkWCMyMqFoYmdD8ACQAAogBHJHMrCxg1cyIiICmCkYWDFsAAgiihYmZCewFFpR0BfI3LLch+"
- + "QUggBiQ0iQjEyMDmh54qCBlUIAAYsCRJsElADQvgWKTlRGeKwECiAF3XgGmMEYQYADZzcoA9z5AAMG9RQC"
- + "AtEC9DxBADFiyFyMjVi0wABBAWLQwQdIiuhYGWJIACCBg+KKUJ9BoBRdS2LQALQMIIGDQIEmwAO1kYcVWH"
- + "CDZAhBAqFqYmOAxj2YNtAwDAYAAYmDEiBYWzHKKkRERYiwAAYSphZEZwxZGZiZQVEJTJkAAMTCyokc7M5o"
- + "ORlC5wcoEjxeAAAJqQXU0UB6W5WFmABMtEzMi1wEEEFAbE0YyAUuzMMEsYQalMkQSBQggUDmNPU3C9IA4L"
- + "CxI+QUggEBiKOU8yExgqccCL3chnkPKlQABhGo6ejHBDKmdUHMlQAAhhQvQaGZGkBIkjcAMywLmI+VKgAB"
- + "CSowsTJhZkhlWXiBpAQggYBqBZl9GVOdBcz0LZqEEEEAMqLULMBLg1THWog9IAwQQA0qiZcRW5aPbAhBAD"
- + "Cg1El4tMAAQQAxoiZYZXnTh1AIQQAzo2QlYpDDjcBgrxGEAAcSAJTthswmiBUwDBBC2GpkZJTaRvQ+mAQK"
- + "IAUuuxdZWQvILQABBmSxMjBj5EpcWgACCMoFOYYSpZyHQHgMIMACt2hmoVEikCQAAAABJRU5ErkJggg==");
-
- assertContains(vVCARD.photo, data);
- assertContains(vVCARD.logo, data);
- assertJohn();
- }
-
- @Test
- public void testHonorificAdditionalSingle() throws Exception {
- assertExtract("/microformats/hcard/15-honorific-additional-single.html");
- assertDefaultVCard();
- assertContains(vVCARD.fn, "Mr. John Maurice Doe, Ph.D.");
-
- assertContains(vVCARD.honorific_prefix, "Mr.");
- assertContains(vVCARD.honorific_suffix, "Ph.D.");
-
- assertContains(vVCARD.given_name, "John");
- assertContains(vVCARD.additional_name, "Maurice");
- assertContains(vVCARD.family_name, "Doe");
- }
-
- @Test
- public void testHonorificAdditionalMultiple() throws Exception {
- assertExtract("/microformats/hcard/16-honorific-additional-multiple.html");
- assertDefaultVCard();
- assertContains(vVCARD.honorific_prefix, "Mr.");
- assertContains(vVCARD.honorific_prefix, "Dr.");
-
- assertContains(vVCARD.honorific_suffix, "Ph.D.");
- assertContains(vVCARD.honorific_suffix, "J.D.");
-
- assertContains(vVCARD.given_name, "John");
- assertContains(vVCARD.additional_name, "Maurice");
- assertContains(vVCARD.additional_name, "Benjamin");
- assertContains(vVCARD.family_name, "Doe");
-
- assertContains(vVCARD.fn,
- "Mr. Dr. John Maurice Benjamin Doe Ph.D., J.D.");
- }
-
- @Test
- public void testEMailNotUri() throws Exception {
- assertExtract("/microformats/hcard/17-email-not-uri.html");
- assertDefaultVCard();
- assertJohn();
- assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
- }
-
- @Test
- public void testObjectDataHttpUri() throws Exception {
- assertExtract("/microformats/hcard/18-object-data-http-uri.html");
- assertDefaultVCard();
- assertJohn();
- }
-
- @Ignore
- @Test
- public void testObjectDataDataUri() throws Exception {
- assertExtract("/microformats/hcard/19-object-data-data-uri.html");
- assertDefaultVCard();
- assertJohn();
-
- assertContains(vVCARD.photo, (Resource) null);
- assertContains(vVCARD.logo, (Resource) null);
- }
-
- @Test
- public void testImgAlt() throws Exception {
- assertExtract("/microformats/hcard/20-image-alt.html");
- assertDefaultVCard();
- Resource uri = RDFUtils.iri("http://example.com/foo.png");
- assertContains(vVCARD.photo, uri);
- assertContains(vVCARD.logo, uri);
- assertJohn();
- }
-
- @Test
- public void testAdr() throws Exception {
- assertExtract("/microformats/hcard/22-adr.html");
- assertDefaultVCard();
- assertJohn();
- assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
- }
-
- @Test
- public void testBirthDayDate() throws Exception {
- assertExtract("/microformats/hcard/27-bday-date.html");
- assertModelNotEmpty();
- assertContains(vVCARD.fn, "john doe");
- assertContains(vVCARD.given_name, "john");
- assertContains(vVCARD.family_name, "doe");
- assertContains(vVCARD.bday, "2000-01-01");
- }
-
- @Test
- public void testBirthDayDateTime() throws Exception {
- assertExtract("/microformats/hcard/28-bday-datetime.html");
- assertModelNotEmpty();
- assertContains(vVCARD.fn, "john doe");
- assertContains(vVCARD.given_name, "john");
- assertContains(vVCARD.family_name, "doe");
- assertContains(vVCARD.bday, "2000-01-01T00:00:00");
- }
-
- @Test
- public void testBirthDayDateTimeTimeZone() throws Exception {
- assertExtract("/microformats/hcard/29-bday-datetime-timezone.html");
- assertModelNotEmpty();
- assertContains(vVCARD.fn, "john doe");
- assertContains(vVCARD.given_name, "john");
- assertContains(vVCARD.family_name, "doe");
- assertContains(vVCARD.bday, "2000-01-01T00:00:00-0800");
- }
-
- @Test
- public void testArea() throws Exception {
- assertExtract("/microformats/hcard/33-area.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- try {
- while (statements.hasNext()) {
- Resource vcard = statements.next().getSubject();
-
- Assert.assertNotNull(findObject(vcard, vVCARD.fn));
- Assert.assertEquals("Joe Public",
- findObjectAsLiteral(vcard, vVCARD.fn));
- Assert.assertNotNull(findObject(vcard, vVCARD.url));
- String url = findObjectAsLiteral(vcard, vVCARD.url);
- Assert.assertNotNull(findObject(vcard, vVCARD.email));
- String mail = findObjectAsLiteral(vcard, vVCARD.email);
- Assert.assertEquals("http://example.com/", url);
- Assert.assertEquals("mailto:joe@example.com", mail);
- }
- } finally {
- statements.close();
- }
-
- // Check that there are 4 organizations.
- assertStatementsSize(RDF.TYPE, vVCARD.Organization, 4);
- statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
- try {
- while (statements.hasNext()) {
- Resource org = statements.next().getSubject();
- assertContains(null, vVCARD.org, org);
- Assert.assertNotNull(findObject(org, vVCARD.organization_name));
- Assert.assertEquals("Joe Public",
- findObjectAsLiteral(org, vVCARD.organization_name));
- }
- } finally {
- statements.close();
- }
- }
-
- @Test
- public void testNotes() throws Exception {
- final String[] NOTES = { "Note 1", "Note 3",
- "Note 4 with a ; and a , to be escaped" };
-
- assertExtract("/microformats/hcard/34-notes.html");
- assertModelNotEmpty();
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- try {
- while (statements.hasNext()) {
- Resource vcard = statements.next().getSubject();
- String fn = findObjectAsLiteral(vcard, vVCARD.fn);
- String mail = findObjectAsLiteral(vcard, vVCARD.email);
- Assert.assertEquals("Joe Public", fn);
- Assert.assertEquals("mailto:joe@example.com", mail);
- }
- } finally {
- statements.close();
- }
- for (String note : NOTES) {
- assertContains(vVCARD.note, note);
- }
- }
-
- @Test
- public void testIncludePattern() throws Exception {
- assertExtract("/microformats/hcard/35-include-pattern.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
-
- RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
- vVCARD.Name);
- try {
- while (statements.hasNext()) {
- Resource name = statements.next().getSubject();
- Assert.assertNotNull(findObject(name, vVCARD.given_name));
- String gn = findObjectAsLiteral(name, vVCARD.given_name);
- Assert.assertEquals("James", gn);
- Assert.assertNotNull(findObject(name, vVCARD.family_name));
- String fn = findObjectAsLiteral(name, vVCARD.family_name);
- Assert.assertEquals("Levine", fn);
- }
- } finally {
- statements.close();
- }
-
- assertStatementsSize(RDF.TYPE, vVCARD.Organization, 2);
- statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
- try {
- while (statements.hasNext()) {
- Resource org = statements.next().getSubject();
- Assert.assertNotNull(findObject(org, vVCARD.organization_name));
- Assert.assertEquals("SimplyHired",
- findObjectAsLiteral(org, vVCARD.organization_name));
-
- RepositoryResult<Statement> statements2 = getStatements(null,
- vVCARD.org, org);
- try {
- while (statements2.hasNext()) {
- Resource vcard = statements2.next().getSubject();
- Assert.assertNotNull(findObject(vcard, vVCARD.title));
- Assert.assertEquals("Microformat Brainstormer",
- findObjectAsLiteral(vcard, vVCARD.title));
- }
- } finally {
- statements2.close();
- }
- }
- } finally {
- statements.close();
- }
- }
-
- @Test
- public void testUid() throws Exception {
- assertExtract("/microformats/hcard/38-uid.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
- RepositoryResult<Statement> iter = getStatements(null, RDF.TYPE,
- vVCARD.VCard);
- while (iter.hasNext()) {
- Resource vcard = iter.next().getSubject();
- Assert.assertNotNull(findObject(vcard, vVCARD.fn));
- String fn = findObjectAsLiteral(vcard, vVCARD.fn);
- Assert.assertNotNull(findObject(vcard, vVCARD.url));
- String url = findObjectAsLiteral(vcard, vVCARD.url);
- Assert.assertNotNull(findObject(vcard, vVCARD.uid));
- String uid = findObjectAsLiteral(vcard, vVCARD.uid);
- Assert.assertEquals("Ryan King", fn);
- Assert.assertEquals("http://theryanking.com/contact/", url);
- Assert.assertEquals("http://theryanking.com/contact/", uid);
-
- }
- }
-
- @Test
- public void testIgnoreChildren() throws Exception {
- assertExtract("/microformats/hcard/41-ignore-children.html");
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
- assertContains(vVCARD.fn, "Melanie Kl\u00f6\u00df");
- assertContains(vVCARD.email, RDFUtils.iri("mailto:mkloes@gmail.com"));
- assertContains(vVCARD.adr, (Resource) null);
- assertNotContains(null, vVCARD.postal_code, "53127");
- assertNotContains(null, vVCARD.locality, "Bonn");
- assertNotContains(null, vVCARD.street_address, "Ippendorfer Weg. 24");
- assertNotContains(null, vVCARD.country_name, "Germany");
- }
-
- /**
- * Tests that the HCardName data is not cumulative and is cleaned up at each
- * extraction.
- *
- * @throws Exception
- */
- @Test
- public void testCumulativeHNames() throws Exception {
- assertExtract("/microformats/hcard/linkedin-michelemostarda.html");
- assertModelNotEmpty();
- assertStatementsSize(vVCARD.given_name, "Michele", 7);
- assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
- }
-
- /**
- * Tests the detection and prevention of the inclusion of an ancestor by a
- * sibling node. This test is related to issue <a
- * href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
- *
- * @throws IOException
- * @throws ExtractionException
- */
- @Test
- public void testInfiniteLoop() throws IOException, ExtractionException {
- assertExtract("/microformats/hcard/infinite-loop.html", false);
- assertIssue(IssueReport.IssueLevel.WARNING,
- ".*Current node tries to include an ancestor node.*");
- }
-
- /**
- * Tests extractor performances. This test is related to issue <a
- * href="https://issues.apache.org/jira/browse/ANY23-76">ANY23-76</a>.
- */
- @Ignore
- @Test(timeout = 30 * 1000)
- public void testExtractionPerformance() {
- assertExtract("/microformats/hcard/performance.html");
- }
-
- private void assertDefaultVCard() throws Exception {
- assertModelNotEmpty();
- assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
- }
-
- private void assertJohn() throws Exception {
- assertContains(vVCARD.fn, "John Doe");
- assertContains(vVCARD.given_name, "John");
- assertContains(vVCARD.family_name, "Doe");
- }
+ private static final VCard vVCARD = VCard.getInstance();
+
+ protected ExtractorFactory<?> getExtractorFactory() {
+ return new HCardExtractorFactory();
+ }
+
+ @Test
+ public void testEMailNotUriReal() throws Exception {
+ assertExtract("/microformats/hcard/17-email-not-uri.html");
+ assertDefaultVCard();
+ assertJohn();
+ assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
+ }
+
+ @Test
+ public void testTel() throws Exception {
+ assertExtract("/microformats/hcard/21-tel.html");
+ assertDefaultVCard();
+ String[] tels = { "+1.415.555.1231", "+1.415.555.1235",
+ "+1.415.555.1236", "+1.415.555.1237", "+1.415.555.1238",
+ "+1.415.555.1239", "+1.415.555.1240", "+1.415.555.1241",
+ "+1.415.555.1242", "+1.415.555.1243" };
+ for (String tel : tels) {
+ assertContains(vVCARD.tel, RDFUtils.iri("tel:" + tel));
+ }
+ Resource telResource = RDFUtils.iri("tel:+14155551233");
+ assertContains(vVCARD.fax, telResource);
+ assertContains(vVCARD.workTel, telResource);
+ assertContains(vVCARD.homeTel, telResource);
+ assertJohn();
+ }
+
+ @Test
+ public void testAbbrTitleEverything() throws Exception {
+ assertExtract("/microformats/hcard/23-abbr-title-everything.html");
+ assertDefaultVCard();
+
+ assertContains(vVCARD.fn, "John Doe");
+ assertContains(vVCARD.nickname, "JJ");
+
+ assertContains(vVCARD.given_name, "Jonathan");
+ assertContains(vVCARD.additional_name, "John");
+ assertContains(vVCARD.family_name, "Doe-Smith");
+ assertContains(vVCARD.honorific_suffix, "Medical Doctor");
+
+ assertContains(vVCARD.title, "President");
+ assertContains(vVCARD.role, "Chief");
+ assertContains(vVCARD.tz, "-0700");
+ assertContains(vVCARD.bday, "2006-04-04");
+ assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
+ assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
+ assertContains(vVCARD.class_, "public");
+ assertContains(vVCARD.note, "this is a note");
+ assertContains(vVCARD.organization_name, "Intellicorp");
+ assertContains(vVCARD.organization_unit, "Intelligence");
+
+ // We define the property in this extractor _but_ we do not parse it.
+ assertContains(vVCARD.geo, (Resource) null);
+ // Thus we do not cointain these.
+ // The interaction is in @link RDFMergerTest.java
+ assertNotContains(RDF.TYPE, vVCARD.Location);
+ assertNotContains(null, vVCARD.latitude, "37.77");
+ assertNotContains(null, vVCARD.longitude, "-122.41");
+
+ // see above
+ assertContains(vVCARD.adr, (Resource) null);
+ assertNotContains(RDF.TYPE, vVCARD.Address);
+ assertNotContains(null, vVCARD.post_office_box, "Box 1234");
+ assertNotContains(null, vVCARD.extended_address, "Suite 100");
+ assertNotContains(null, vVCARD.street_address, "123 Fake Street");
+ assertNotContains(null, vVCARD.locality, "San Francisco");
+ assertNotContains(null, vVCARD.region, "California");
+ assertNotContains(null, vVCARD.postal_code, "12345-6789");
+ assertNotContains(null, vVCARD.country_name, "United States of America");
+ assertNotContains(null, vVCARD.addressType, "work");
+ }
+
+ @Test
+ public void testGeoAbbr() throws Exception {
+ assertExtract("/microformats/hcard/25-geo-abbr.html");
+ assertModelNotEmpty();
+ assertContains(vVCARD.fn, "Paradise");
+ assertContains(RDF.TYPE, vVCARD.Organization);
+ assertContains(vVCARD.organization_name, "Paradise");
+ // See above: geo property yes, gteo blank node no.
+ assertContains(vVCARD.geo, (Resource) null);
+ assertNotContains(RDF.TYPE, vVCARD.Location);
+ assertNotContains(null, vVCARD.latitude, "30.267991");
+ assertNotContains(null, vVCARD.longitude, "-97.739568");
+ }
+
+ @Test
+ public void testAncestors() throws Exception {
+ assertExtract("/microformats/hcard/26-ancestors.html");
+ assertModelNotEmpty();
+
+ assertContains(vVCARD.fn, "John Doe");
+ assertNotContains(null, vVCARD.fn,
+ "Mister Jonathan John Doe-Smith Medical Doctor");
+ assertContains(vVCARD.nickname, "JJ");
+ assertNotContains(RDF.TYPE, vVCARD.Address);
+ assertContains(vVCARD.tz, "-0700");
+ assertContains(vVCARD.title, "President");
+ assertContains(vVCARD.role, "Chief");
+ assertContains(vVCARD.organization_name, "Intellicorp");
+ assertContains(vVCARD.organization_unit, "Intelligence");
+
+ assertContains(vVCARD.tel, RDFUtils.iri("tel:415.555.1234"));
+ assertContains(vVCARD.uid, "abcdefghijklmnopqrstuvwxyz");
+ assertContains(vVCARD.note, "this is a note");
+ assertContains(vVCARD.class_, "public");
+
+ assertNotContains(RDF.TYPE, vVCARD.Location);
+ assertContains(vVCARD.geo, (Resource) null);
+ assertNotContains(null, vVCARD.latitude, "37.77");
+ assertNotContains(null, vVCARD.longitude, "-122.41");
+
+ assertContains(RDF.TYPE, vVCARD.Name);
+ assertContains(vVCARD.additional_name, "John");
+ assertContains(vVCARD.given_name, "Jonathan");
+ assertContains(vVCARD.family_name, "Doe-Smith");
+ assertContains(vVCARD.honorific_prefix, "Mister");
+ assertContains(vVCARD.honorific_suffix, "Medical Doctor");
+
+ assertNotContains(null, vVCARD.post_office_box, "Box 1234");
+ assertNotContains(null, vVCARD.extended_address, "Suite 100");
+ assertNotContains(null, vVCARD.street_address, "123 Fake Street");
+ assertNotContains(null, vVCARD.locality, "San Francisco");
+ assertNotContains(null, vVCARD.region, "California");
+ assertNotContains(null, vVCARD.postal_code, "12345-6789");
+ assertNotContains(null, vVCARD.country_name, "United States of America");
+ assertNotContains(null, vVCARD.addressType, "work");
+ }
+
+ @Test
+ public void testfnOrg() throws Exception {
+ assertExtract("/microformats/hcard/30-fn-org.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+ RepositoryResult<Statement> repositoryResult = getStatements(null,
+ RDF.TYPE, vVCARD.VCard);
+ try {
+ while (repositoryResult.hasNext()) {
+ Resource card = repositoryResult.next().getSubject();
+ assertNotNull(findObject(card, vVCARD.fn));
+ String name = findObjectAsLiteral(card, vVCARD.fn);
+
+ assertNotNull(findObject(card, vVCARD.org));
+ Resource org = findObjectAsResource(card, vVCARD.org);
+ assertNotNull(findObject(org, vVCARD.organization_name));
+
+ if (name.equals("Dan Connolly")) {
+ assertNotNull(findObject(card, vVCARD.n));
+ assertFalse(name.equals(org.stringValue()));
+ }
+ }
+ } finally {
+ repositoryResult.close();
+ }
+ }
+
+ @Test
+ public void testInclude() throws Exception {
+ assertExtract("/microformats/hcard/31-include.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
+ assertStatementsSize(vVCARD.email, (Value) null, 3);
+
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ try {
+ while (statements.hasNext()) {
+ Resource vcard = statements.next().getSubject();
+
+ assertNotNull(findObject(vcard, vVCARD.fn));
+ assertEquals("Brian Suda",
+ findObjectAsLiteral(vcard, vVCARD.fn));
+
+ assertNotNull(findObject(vcard, vVCARD.url));
+ String url = findObjectAsResource(vcard, vVCARD.url)
+ .stringValue();
+ assertEquals("http://suda.co.uk/", url);
+
+ Resource name = findObjectAsResource(vcard, vVCARD.n);
+ assertEquals("Brian",
+ findObjectAsLiteral(name, vVCARD.given_name));
+ assertEquals("Suda",
+ findObjectAsLiteral(name, vVCARD.family_name));
+
+ // Included data.
+ assertNotNull(findObject(vcard, vVCARD.email));
+ String mail = findObjectAsLiteral(vcard, vVCARD.email);
+ assertEquals("mailto:correct@example.com", mail);
+ }
+ } finally {
+ statements.close();
+ }
+ }
+
+ @Test
+ public void testHeader() throws Exception {
+ assertExtract("/microformats/hcard/32-header.html");
+ assertModelNotEmpty();
+ // check fn, name, family, nick.
+ assertJohn();
+
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ try {
+ Resource example = RDFUtils.iri("http://example.org/");
+ while (statements.hasNext()) {
+ Resource card = statements.next().getSubject();
+ assertNotNull(findObject(card, vVCARD.fn));
+
+ String fn = findObjectAsLiteral(card, vVCARD.fn);
+ if ("Jane Doe".equals(fn)) {
+ assertNotFound(card, vVCARD.org);
+ } else {
+ assertTrue("John Doe".equals(fn)
+ || "Brian Suda".equals(fn));
+
+ assertNotNull(findObject(card, vVCARD.url));
+ assertEquals(example,
+ findObjectAsResource(card, vVCARD.url));
+
+ assertNotNull(findObject(card, vVCARD.org));
+ Resource org = findObjectAsResource(card, vVCARD.org);
+ assertContains(org, RDF.TYPE, vVCARD.Organization);
+ assertNotNull(org);
+ assertNotNull(findObject(card, vVCARD.org));
+ assertNotNull(findObject(org,
+ vVCARD.organization_name));
+ assertEquals("example.org",
+ findObjectAsLiteral(org, vVCARD.organization_name));
+ }
+ }
+ // Just to be sure there are no spurious statements.
+ // assertStatementsSize(VCARD.org, null, 2);
+ assertStatementsSize(vVCARD.url, example, 2);
+ } finally {
+ statements.close();
+ }
+ }
+
+ @Test
+ public void testAreaFull() throws Exception {
+ assertExtract("/microformats/hcard/33-area.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
+
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ while (statements.hasNext()) {
+ Resource vcard = statements.next().getSubject();
+ final Value fnValue = findObject(vcard, vVCARD.fn);
+ assertNotNull(fnValue);
+ String fn = fnValue.stringValue();
+ final Value vcardValue = findObject(vcard, vVCARD.url);
+ assertNotNull(vcardValue);
+ String url = vcardValue.stringValue();
+ final Value emailValue = findObject(vcard, vVCARD.email);
+ assertNotNull(emailValue);
+ String mail = emailValue.stringValue();
+ assertEquals("Joe Public", fn);
+ assertEquals("http://example.com/", url);
+ assertEquals("mailto:joe@example.com", mail);
+ }
+ }
+
+ @Test
+ public void testCategories() throws Exception {
+ assertExtract("/microformats/hcard/36-categories.html");
+ assertModelNotEmpty();
+ assertContains(vVCARD.given_name, "Joe");
+ assertContains(vVCARD.given_name, "john");
+ assertContains(vVCARD.family_name, "doe");
+ assertContains(vVCARD.family_name, "User");
+ assertContains(vVCARD.fn, "john doe");
+ assertContains(vVCARD.fn, "Joe User");
+
+ assertContains(vVCARD.category, "C1");
+ assertContains(vVCARD.category, "C2a");
+ assertContains(vVCARD.category, "C4");
+ assertContains(vVCARD.category, "User");
+ String[] cats = { "C3", "C5", "C6", "C7", "C9", "luser", "D1", "D2",
+ "D3" };
+ for (String cat : cats)
+ assertContains(vVCARD.category, "http://example.com/tag/" + cat);
+
+ assertNotContains(null, vVCARD.category, "D4");
+ }
+
+ @Test
+ public void testSingleton() throws Exception {
+ // this tests probably tests that e just get the first fn and so on
+ assertExtract("/microformats/hcard/37-singleton.html");
+ assertModelNotEmpty();
+ assertStatementsSize(vVCARD.fn, (Value) null, 1);
+ assertContains(vVCARD.fn, "john doe 1");
+
+ assertStatementsSize(RDF.TYPE, vVCARD.Name, 1);
+ assertStatementsSize(vVCARD.given_name, (Value) null, 1);
+ assertContains(vVCARD.given_name, "john");
+ assertStatementsSize(vVCARD.family_name, (Value) null, 1);
+ assertContains(vVCARD.family_name, "doe");
+ assertStatementsSize(vVCARD.sort_string, (Value) null, 1);
+ assertContains(vVCARD.sort_string, "d");
+
+ assertStatementsSize(vVCARD.bday, (Value) null, 1);
+ assertContains(vVCARD.bday, "20060707");
+ assertStatementsSize(vVCARD.rev, (Value) null, 1);
+ assertContains(vVCARD.rev, "20060707");
+ assertStatementsSize(vVCARD.class_, (Value) null, 1);
+ assertContains(vVCARD.class_, "public");
+ assertStatementsSize(vVCARD.tz, (Value) null, 1);
+ assertContains(vVCARD.tz, "+0600");
+
+ // Why 0? because the extractor does not look at geo uF!
+ assertStatementsSize(RDF.TYPE, vVCARD.Location, 0);
+ assertStatementsSize(vVCARD.geo, (Value) null, 2);
+
+ assertNotContains(null, vVCARD.latitude, "123.45");
+ assertNotContains(null, vVCARD.longitude, "67.89");
+
+ assertStatementsSize(vVCARD.uid, (Value) null, 1);
+ assertContains(vVCARD.uid, "unique-id-1");
+ }
+
+ @Test
+ public void testUidFull() throws Exception {
+ assertExtract("/microformats/hcard/38-uid.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+
+ try {
+ while (statements.hasNext()) {
+ Resource vcard = statements.next().getSubject();
+ assertNotNull(findObject(vcard, vVCARD.fn));
+ String fn = findObjectAsLiteral(vcard, vVCARD.fn);
+ assertEquals("Ryan King", fn);
+
+ assertNotNull(findObject(vcard, vVCARD.n));
+ Resource n = findObjectAsResource(vcard, vVCARD.n);
+ assertNotNull(n);
+ assertNotNull(findObject(n, vVCARD.given_name));
+ assertEquals("Ryan",
+ findObjectAsLiteral(n, vVCARD.given_name));
+ assertNotNull(findObject(n, vVCARD.family_name));
+ assertEquals("King",
+ findObjectAsLiteral(n, vVCARD.family_name));
+
+ assertNotNull(findObject(vcard, vVCARD.url));
+ Resource url = findObjectAsResource(vcard, vVCARD.url);
+
+ assertNotNull(findObject(vcard, vVCARD.uid));
+ String uid = findObjectAsLiteral(vcard, vVCARD.uid);
+
+ assertEquals("http://theryanking.com/contact/",
+ url.stringValue());
+ assertEquals("http://theryanking.com/contact/", uid);
+ }
+ } finally {
+ statements.close();
+ }
+ }
+
+ @Test
+ public void testRomanianWikipedia() throws Exception {
+ assertExtract("/microformats/hcard/40-fn-inside-adr.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+
+ try {
+ while (statements.hasNext()) {
+ Resource card = statements.next().getSubject();
+ assertNotNull(findObject(card, vVCARD.fn));
+ String fn = findObjectAsLiteral(card, vVCARD.fn);
+ assertEquals("Berlin", fn);
+
+ assertNotNull(findObject(card, vVCARD.org));
+ Resource org = findObjectAsResource(card, vVCARD.org);
+ assertContains(org, RDF.TYPE, vVCARD.Organization);
+ assertNotNull(org);
+ assertNotNull(findObject(card, vVCARD.org));
+ assertNotNull(findObject(org, vVCARD.organization_name));
+ assertEquals("Berlin",
+ findObjectAsLiteral(org, vVCARD.organization_name));
+
+ }
+ } finally {
+ statements.close();
+ }
+ }
+
+ @Test
+ public void testNoMicroformats() throws Exception, IOException,
+ ExtractionException {
+ extract("/html/html-without-uf.html");
+ assertModelEmpty();
+ }
+
+ @Test
+ public void testBasic() throws Exception {
+ assertExtract("/microformats/hcard/01-tantek-basic.html");
+ assertModelNotEmpty();
+ assertContains(RDF.TYPE, vVCARD.VCard);
+ // assertContains(RDF.TYPE, vVCARD.Organization);
+ assertContains(RDF.TYPE, vVCARD.Name);
+ // assertContains(vVCARD.organization_name, "Technorati");
+ Resource person = findExactlyOneBlankSubject(vVCARD.fn,
+ RDFUtils.literal("Tantek Celik"));
+ assertNotNull(person);
+ Resource org = findExactlyOneBlankSubject(vVCARD.organization_name,
+ RDFUtils.literal("Technorati"));
+ assertNotNull(org);
+ assertContains(person, vVCARD.url, RDFUtils.iri("http://tantek.com/"));
+ assertContains(person, vVCARD.n, (Resource) null);
+ assertContains(person, vVCARD.org, (Resource) null);
+ }
+
+ @Test
+ public void testMultipleclassNamesOnVCard() throws Exception {
+ assertExtract("/microformats/hcard/02-multiple-class-names-on-vcard.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+ Resource name;
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ while (statements.hasNext()) {
+ name = statements.next().getSubject();
+ assertContains(name, vVCARD.fn, "Ryan King");
+ }
+ }
+
+ @Test
+ public void testImpliedNames() throws Exception {
+ String[] ns = { "Ryan King", "King", "Ryan",
+
+ "Ryan King", "King", "Ryan",
+
+ "Ryan King", "King", "Ryan",
+
+ "Brian Suda", "Suda", "Brian",
+
+ "King, Ryan", "King", "Ryan",
+
+ "King, R", "King", "R",
+
+ "King R", "R", "King",
+
+ "R King", "King", "R",
+
+ "King R.", "R.", "King",
+
+ "Jesse James Garrett", "Garrett", "Jesse",
+
+ "Thomas Vander Wall", "Wall", "Thomas" };
+ List<String> NAMES = Arrays.asList(ns);
+ assertExtract("/microformats/hcard/03-implied-n.html");
+ assertModelNotEmpty();
+
+ RepositoryResult<Statement> statements = getStatements(null, vVCARD.fn,
+ null);
+ Resource vcard;
+ int count = 0;
+ try {
+ while (statements.hasNext()) {
+ vcard = statements.next().getSubject();
+ assertContains(vcard, RDF.TYPE, vVCARD.VCard);
+ Resource name = findObjectAsResource(vcard, vVCARD.n);
+
+ final String objLiteral = findObjectAsLiteral(vcard, vVCARD.fn);
+ int idx = NAMES.indexOf(objLiteral);
+ assertTrue(
+ String.format("not in names: '%s'", objLiteral),
+ idx >= 0);
+ assertEquals(NAMES.get(idx + 1),
+ findObjectAsLiteral(name, vVCARD.family_name));
+ assertEquals(NAMES.get(idx + 2),
+ findObjectAsLiteral(name, vVCARD.given_name));
+ count++;
+ }
+ } finally {
+ statements.close();
+ }
+ assertEquals(10, count);
+ }
+
+ @Test
+ public void testIgnoreUnknowns() throws Exception {
+ assertExtract("/microformats/hcard/04-ignore-unknowns.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.fn, "Ryan King");
+ assertContains(vVCARD.n, (Resource) null);
+ assertContains(null, "Ryan");
+ assertContains(vVCARD.given_name, "Ryan");
+ assertContains(vVCARD.family_name, "King");
+ }
+
+ @Test
+ public void testMailto1() throws Exception {
+ assertExtract("/microformats/hcard/05-mailto-1.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.fn, "Ryan King");
+ assertContains(RDF.TYPE, vVCARD.Name);
+
+ assertContains(vVCARD.email, RDFUtils.iri("mailto:ryan@technorati.com"));
+
+ assertContains(vVCARD.given_name, "Ryan");
+ assertContains(vVCARD.family_name, "King");
+ }
+
+ @Test
+ public void testMailto2() throws Exception {
+ assertExtract("/microformats/hcard/06-mailto-2.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.fn, "Brian Suda");
+
+ assertContains(vVCARD.email, RDFUtils.iri("mailto:brian@example.com"));
+ assertContains(vVCARD.given_name, "Brian");
+ assertContains(vVCARD.family_name, "Suda");
+ }
+
+ @Test
+ public void testRelativeUrl() throws Exception {
+ assertExtract("/microformats/hcard/07-relative-url.html");
+ assertDefaultVCard();
+ assertJohn();
+ assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
+ }
+
+ @Test
+ public void testRelativeUrlBase() throws Exception {
+ assertExtract("/microformats/hcard/08-relative-url-base.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.url, RDFUtils.iri(baseIRI + "home/blah"));
+ assertJohn();
+ }
+
+ @Test
+ public void testRelativeUrlXmlBase1() throws Exception {
+ assertExtract("/microformats/hcard/09-relative-url-xmlbase-1.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
+ assertJohn();
+ }
+
+ @Test
+ public void testRelativeUrlXmlBase2() throws Exception {
+ assertExtract("/microformats/hcard/10-relative-url-xmlbase-2.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.url, RDFUtils.iri((baseIRI + "home/blah")));
+ assertJohn();
+ }
+
+ @Test
+ public void testMultipleUrls() throws Exception {
+ assertExtract("/microformats/hcard/11-multiple-urls.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/foo")));
+ assertContains(vVCARD.url, RDFUtils.iri(("http://example.com/bar")));
+
+ assertJohn();
+ }
+
+ @Test
+ public void testImageSrc() throws Exception {
+ assertExtract("/microformats/hcard/12-img-src-url.html");
+ assertDefaultVCard();
+ assertJohn();
+ }
+
+ @Test
+ public void testPhotoLogo() throws Exception {
+ assertExtract("/microformats/hcard/13-photo-logo.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.photo,
+ RDFUtils.iri(("http://example.org/picture1.png")));
+ assertContains(vVCARD.photo,
+ RDFUtils.iri(("http://example.org/picture2.png")));
+ assertContains(vVCARD.logo,
+ RDFUtils.iri(("http://example.org/picture1.png")));
+ assertContains(vVCARD.logo,
+ RDFUtils.iri(("http://example.org/picture2.png")));
+ assertJohn();
+ }
+
+ @Test
+ public void testImgSrcDataUrl() throws Exception {
+ assertExtract("/microformats/hcard/14-img-src-data-url.html");
+ assertDefaultVCard();
+ Resource data = RDFUtils.iri("data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAMAAAAp4XiDAAAABGdBTUEAAK/"
+ + "INwWK6QAAABl0RVh0U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAAASUExURf///8zMzJmZmWZmZ"
+ + "jMzMwAAAPOPemkAAAM1SURBVHjaYmBgYGBkYQUBFkYWFiCPCchixQAMCCZAACF0MAMVM4K4TFh0IGsBCC"
+ + "AkOxhYmBnAAKaHhZkZmxaAAGJgYIbpYGBihGgBWsTMzMwE4jIhaWGAYoAAYmCECDExYAcwGxkg5oNIgAB"
+ + "igDqLARdgZmGB2wICrKwAAcSA3xKgIxlZ0PwCEEAMBCxhgHoWSQtAADFAAxgfYEJ1GEAAQbQw4tUCsocB"
+ + "YQVAADEgu4uRkREeUCwszEwwLhOKLQABhNDCBA4aSDgwwhIAJKqYUPwCEEAMUK/AUwnc9aywJMCI7DAgA"
+ + "AggBohZ8JTBhGIJzCoWZL8ABBCYidAB8RUjWppkYUG2BSCAGMDqEMZiswUtXgACiAHsFYixTMywGGLGpgU"
+ + "WYgABxAA2mQkWCMyMqFoYmdD8ACQAAogBHJHMrCxg1cyIiICmCkYWDFsAAgiihYmZCewFFpR0BfI3LLch+"
+ + "QUggBiQ0iQjEyMDmh54qCBlUIAAYsCRJsElADQvgWKTlRGeKwECiAF3XgGmMEYQYADZzcoA9z5AAMG9RQC"
+ + "AtEC9DxBADFiyFyMjVi0wABBAWLQwQdIiuhYGWJIACCBg+KKUJ9BoBRdS2LQALQMIIGDQIEmwAO1kYcVWH"
+ + "CDZAhBAqFqYmOAxj2YNtAwDAYAAYmDEiBYWzHKKkRERYiwAAYSphZEZwxZGZiZQVEJTJkAAMTCyokc7M5o"
+ + "ORlC5wcoEjxeAAAJqQXU0UB6W5WFmABMtEzMi1wEEEFAbE0YyAUuzMMEsYQalMkQSBQggUDmNPU3C9IA4L"
+ + "CxI+QUggEBiKOU8yExgqccCL3chnkPKlQABhGo6ejHBDKmdUHMlQAAhhQvQaGZGkBIkjcAMywLmI+VKgAB"
+ + "CSowsTJhZkhlWXiBpAQggYBqBZl9GVOdBcz0LZqEEEEAMqLULMBLg1THWog9IAwQQA0qiZcRW5aPbAhBAD"
+ + "Cg1El4tMAAQQAxoiZYZXnTh1AIQQAzo2QlYpDDjcBgrxGEAAcSAJTthswmiBUwDBBC2GpkZJTaRvQ+mAQK"
+ + "IAUuuxdZWQvILQABBmSxMjBj5EpcWgACCMoFOYYSpZyHQHgMIMACt2hmoVEikCQAAAABJRU5ErkJggg==");
+
+ assertContains(vVCARD.photo, data);
+ assertContains(vVCARD.logo, data);
+ assertJohn();
+ }
+
+ @Test
+ public void testHonorificAdditionalSingle() throws Exception {
+ assertExtract("/microformats/hcard/15-honorific-additional-single.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.fn, "Mr. John Maurice Doe, Ph.D.");
+
+ assertContains(vVCARD.honorific_prefix, "Mr.");
+ assertContains(vVCARD.honorific_suffix, "Ph.D.");
+
+ assertContains(vVCARD.given_name, "John");
+ assertContains(vVCARD.additional_name, "Maurice");
+ assertContains(vVCARD.family_name, "Doe");
+ }
+
+ @Test
+ public void testHonorificAdditionalMultiple() throws Exception {
+ assertExtract("/microformats/hcard/16-honorific-additional-multiple.html");
+ assertDefaultVCard();
+ assertContains(vVCARD.honorific_prefix, "Mr.");
+ assertContains(vVCARD.honorific_prefix, "Dr.");
+
+ assertContains(vVCARD.honorific_suffix, "Ph.D.");
+ assertContains(vVCARD.honorific_suffix, "J.D.");
+
+ assertContains(vVCARD.given_name, "John");
+ assertContains(vVCARD.additional_name, "Maurice");
+ assertContains(vVCARD.additional_name, "Benjamin");
+ assertContains(vVCARD.family_name, "Doe");
+
+ assertContains(vVCARD.fn,
+ "Mr. Dr. John Maurice Benjamin Doe Ph.D., J.D.");
+ }
+
+ @Test
+ public void testEMailNotUri() throws Exception {
+ assertExtract("/microformats/hcard/17-email-not-uri.html");
+ assertDefaultVCard();
+ assertJohn();
+ assertContains(vVCARD.email, RDFUtils.iri("mailto:john@example.com"));
+ }
+
+ @Test
+ public void testObjectDataHttpUri() throws Exception {
+ assertExtract("/microformats/hcard/18-object-data-http-uri.html");
+ assertDefaultVCard();
+ assertJohn();
+ }
+
+ @Test
+ public void testObjectDataDataUri() throws Exception {
+ assertExtract("/microformats/hcard/19-object-data-data-uri.html");
+ assertDefaultVCard();
+ assertJohn();
+
+ assertContains(vVCARD.photo, (Resource) null);
+ assertContains(vVCARD.logo, (Resource) null);
+ }
+
+ @Test
+ public void testImgAlt() throws Exception {
+ assertExtract("/microformats/hcard/20-image-alt.html");
+ assertDefaultVCard();
+ Resource uri = RDFUtils.iri("http://example.com/foo.png");
+ assertContains(vVCARD.photo, uri);
+ assertContains(vVCARD.logo, uri);
+ assertJohn();
+ }
+
+ @Test
+ public void testAdr() throws Exception {
+ assertExtract("/microformats/hcard/22-adr.html");
+ assertDefaultVCard();
+ assertJohn();
+ assertStatementsSize(RDF.TYPE, vVCARD.Address, 0);
+ }
+
+ @Test
+ public void testBirthDayDate() throws Exception {
+ assertExtract("/microformats/hcard/27-bday-date.html");
+ assertModelNotEmpty();
+ assertContains(vVCARD.fn, "john doe");
+ assertContains(vVCARD.given_name, "john");
+ assertContains(vVCARD.family_name, "doe");
+ assertContains(vVCARD.bday, "2000-01-01");
+ }
+
+ @Test
+ public void testBirthDayDateTime() throws Exception {
+ assertExtract("/microformats/hcard/28-bday-datetime.html");
+ assertModelNotEmpty();
+ assertContains(vVCARD.fn, "john doe");
+ assertContains(vVCARD.given_name, "john");
+ assertContains(vVCARD.family_name, "doe");
+ assertContains(vVCARD.bday, "2000-01-01T00:00:00");
+ }
+
+ @Test
+ public void testBirthDayDateTimeTimeZone() throws Exception {
+ assertExtract("/microformats/hcard/29-bday-datetime-timezone.html");
+ assertModelNotEmpty();
+ assertContains(vVCARD.fn, "john doe");
+ assertContains(vVCARD.given_name, "john");
+ assertContains(vVCARD.family_name, "doe");
+ assertContains(vVCARD.bday, "2000-01-01T00:00:00-0800");
+ }
+
+ @Test
+ public void testArea() throws Exception {
+ assertExtract("/microformats/hcard/33-area.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 5);
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ try {
+ while (statements.hasNext()) {
+ Resource vcard = statements.next().getSubject();
+
+ assertNotNull(findObject(vcard, vVCARD.fn));
+ assertEquals("Joe Public",
+ findObjectAsLiteral(vcard, vVCARD.fn));
+ assertNotNull(findObject(vcard, vVCARD.url));
+ String url = findObjectAsLiteral(vcard, vVCARD.url);
+ assertNotNull(findObject(vcard, vVCARD.email));
+ String mail = findObjectAsLiteral(vcard, vVCARD.email);
+ assertEquals("http://example.com/", url);
+ assertEquals("mailto:joe@example.com", mail);
+ }
+ } finally {
+ statements.close();
+ }
+
+ // Check that there are 4 organizations.
+ assertStatementsSize(RDF.TYPE, vVCARD.Organization, 4);
+ statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
+ try {
+ while (statements.hasNext()) {
+ Resource org = statements.next().getSubject();
+ assertContains(null, vVCARD.org, org);
+ assertNotNull(findObject(org, vVCARD.organization_name));
+ assertEquals("Joe Public",
+ findObjectAsLiteral(org, vVCARD.organization_name));
+ }
+ } finally {
+ statements.close();
+ }
+ }
+
+ @Test
+ public void testNotes() throws Exception {
+ final String[] NOTES = { "Note 1", "Note 3",
+ "Note 4 with a ; and a , to be escaped" };
+
+ assertExtract("/microformats/hcard/34-notes.html");
+ assertModelNotEmpty();
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ try {
+ while (statements.hasNext()) {
+ Resource vcard = statements.next().getSubject();
+ String fn = findObjectAsLiteral(vcard, vVCARD.fn);
+ String mail = findObjectAsLiteral(vcard, vVCARD.email);
+ assertEquals("Joe Public", fn);
+ assertEquals("mailto:joe@example.com", mail);
+ }
+ } finally {
+ statements.close();
+ }
+ for (String note : NOTES) {
+ assertContains(vVCARD.note, note);
+ }
+ }
+
+ @Test
+ public void testIncludePattern() throws Exception {
+ assertExtract("/microformats/hcard/35-include-pattern.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 3);
+
+ RepositoryResult<Statement> statements = getStatements(null, RDF.TYPE,
+ vVCARD.Name);
+ try {
+ while (statements.hasNext()) {
+ Resource name = statements.next().getSubject();
+ assertNotNull(findObject(name, vVCARD.given_name));
+ String gn = findObjectAsLiteral(name, vVCARD.given_name);
+ assertEquals("James", gn);
+ assertNotNull(findObject(name, vVCARD.family_name));
+ String fn = findObjectAsLiteral(name, vVCARD.family_name);
+ assertEquals("Levine", fn);
+ }
+ } finally {
+ statements.close();
+ }
+
+ assertStatementsSize(RDF.TYPE, vVCARD.Organization, 2);
+ statements = getStatements(null, RDF.TYPE, vVCARD.Organization);
+ try {
+ while (statements.hasNext()) {
+ Resource org = statements.next().getSubject();
+ assertNotNull(findObject(org, vVCARD.organization_name));
+ assertEquals("SimplyHired",
+ findObjectAsLiteral(org, vVCARD.organization_name));
+
+ RepositoryResult<Statement> statements2 = getStatements(null,
+ vVCARD.org, org);
+ try {
+ while (statements2.hasNext()) {
+ Resource vcard = statements2.next().getSubject();
+ assertNotNull(findObject(vcard, vVCARD.title));
+ assertEquals("Microformat Brainstormer",
+ findObjectAsLiteral(vcard, vVCARD.title));
+ }
+ } finally {
+ statements2.close();
+ }
+ }
+ } finally {
+ statements.close();
+ }
+ }
+
+ @Test
+ public void testUid() throws Exception {
+ assertExtract("/microformats/hcard/38-uid.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 4);
+ RepositoryResult<Statement> iter = getStatements(null, RDF.TYPE,
+ vVCARD.VCard);
+ while (iter.hasNext()) {
+ Resource vcard = iter.next().getSubject();
+ assertNotNull(findObject(vcard, vVCARD.fn));
+ String fn = findObjectAsLiteral(vcard, vVCARD.fn);
+ assertNotNull(findObject(vcard, vVCARD.url));
+ String url = findObjectAsLiteral(vcard, vVCARD.url);
+ assertNotNull(findObject(vcard, vVCARD.uid));
+ String uid = findObjectAsLiteral(vcard, vVCARD.uid);
+ assertEquals("Ryan King", fn);
+ assertEquals("http://theryanking.com/contact/", url);
+ assertEquals("http://theryanking.com/contact/", uid);
+
+ }
+ }
+
+ @Test
+ public void testIgnoreChildren() throws Exception {
+ assertExtract("/microformats/hcard/41-ignore-children.html");
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
+ assertContains(vVCARD.fn, "Melanie Kl\u00f6\u00df");
+ assertContains(vVCARD.email, RDFUtils.iri("mailto:mkloes@gmail.com"));
+ assertContains(vVCARD.adr, (Resource) null);
+ assertNotContains(null, vVCARD.postal_code, "53127");
+ assertNotContains(null, vVCARD.locality, "Bonn");
+ assertNotContains(null, vVCARD.street_address, "Ippendorfer Weg. 24");
+ assertNotContains(null, vVCARD.country_name, "Germany");
+ }
+
+ /**
+ * Tests that the HCardName data is not cumulative and is cleaned up at each
+ * extraction.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testCumulativeHNames() throws Exception {
+ assertExtract("/microformats/hcard/linkedin-michelemostarda.html");
+ assertModelNotEmpty();
+ assertStatementsSize(vVCARD.given_name, "Michele", 7);
+ assertStatementsSize(vVCARD.family_name, "Mostarda", 7);
+ }
+
+ /**
+ * Tests the detection and prevention of the inclusion of an ancestor by a
+ * sibling node. This test is related to issue <a
+ * href="https://issues.apache.org/jira/browse/ANY23-58">ANY23-58</a>.
+ *
+ * @throws IOException
+ * @throws ExtractionException
+ */
+ @Test
+ public void testInfiniteLoop() throws IOException, ExtractionException {
+ assertExtract("/microformats/hcard/infinite-loop.html", false);
+ assertIssue(IssueReport.IssueLevel.WARNING,
+ ".*Current node tries to include an ancestor node.*");
+ }
+
+ /**
+ * Tests extractor performances. This test is related to issue <a
+ * href="https://issues.apache.org/jira/browse/ANY23-76">ANY23-76</a>.
+ */
+ @Test(timeout = 30 * 1000)
+ public void testExtractionPerformance() {
+ assertExtract("/microformats/hcard/performance.html");
+ }
+
+ private void assertDefaultVCard() throws Exception {
+ assertModelNotEmpty();
+ assertStatementsSize(RDF.TYPE, vVCARD.VCard, 1);
+ }
+
+ private void assertJohn() throws Exception {
+ assertContains(vVCARD.fn, "John Doe");
+ assertContains(vVCARD.given_name, "John");
+ assertContains(vVCARD.family_name, "Doe");
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
index 815d220..5f9ee7d 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
@@ -23,7 +23,6 @@ import org.apache.any23.vocab.FOAF;
import org.apache.any23.vocab.HListing;
import org.apache.any23.vocab.SINDICE;
import org.junit.Test;
-import org.junit.Ignore;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.slf4j.Logger;
@@ -151,7 +150,6 @@ public class HListingExtractorTest extends AbstractExtractorTestCase {
RDFUtils.iri(baseIRI.stringValue() + "pic.jpg"));
}
- @Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull")
@Test
public void testKelkoo() throws Exception {
assertExtract("/microformats/hlisting/kelkoo.html");
@@ -187,7 +185,6 @@ public class HListingExtractorTest extends AbstractExtractorTestCase {
assertContains(vHLISTING.price, "\u00A3480.17");
}
- @Ignore("ANY23-159: Error with nodes and markup extracted from HListingExtractorTest.testKelkoo & testKelkooFull")
@Test
public void testKelkooFull() throws Exception {
assertExtract("/microformats/hlisting/kelkoo-full.html");
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
index ffd4e26..4fa237e 100644
--- a/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataParserTest.java
@@ -37,7 +37,6 @@ import org.apache.any23.extractor.html.TagSoupParser;
import org.apache.any23.util.StreamUtils;
import org.apache.commons.io.IOUtils;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
@@ -54,12 +53,11 @@ import static org.junit.Assert.assertFalse;
*/
public class MicrodataParserTest {
- @Rule
- public final Timeout timeout = new Timeout(100, TimeUnit.SECONDS);
-
+// @Rule
+// public final Timeout timeout = new Timeout(100, TimeUnit.SECONDS);
+
private static final Logger logger = LoggerFactory.getLogger(MicrodataParserTest.class);
- @Ignore("TODO: Determine the cause of this")
@Test
public void testBasicFeatures() throws IOException {
extractItemsAndVerifyJSONSerialization(
@@ -68,7 +66,6 @@ public class MicrodataParserTest {
);
}
- @Ignore("TODO: Determine the cause of this")
@Test
public void testNestedMicrodata() throws IOException {
extractItemsAndVerifyJSONSerialization(
@@ -77,7 +74,6 @@ public class MicrodataParserTest {
);
}
- @Ignore("TODO: Determine the cause of this")
@Test
public void testAdvancedItemrefManagement() throws IOException {
extractItemsAndVerifyJSONSerialization(
@@ -86,7 +82,6 @@ public class MicrodataParserTest {
);
}
- @Ignore("TODO: Determine the cause of this")
@Test
public void testMicrodataJSONSerialization() throws IOException {
final Document document = getMicrodataDom("microdata-nested");
@@ -97,6 +92,7 @@ public class MicrodataParserTest {
final String expected = StreamUtils.asString(
this.getClass().getResourceAsStream("/microdata/microdata-json-serialization.json")
);
+
Assert.assertEquals("Unexpected serialization for Microdata file.", expected, baos.toString());
}
@@ -122,29 +118,29 @@ public class MicrodataParserTest {
final AtomicBoolean foundFailure = new AtomicBoolean(false);
for (int i = 0; i < threadCount; i++) {
threads.add(new Thread("Test-thread-" + i) {
- @Override
- public void run() {
- try {
- beforeLatch.await();
- int counter = 0;
- while (counter++ < attemptCount && !foundFailure.get()) {
- final Document document = getDom(content);
- final MicrodataParserReport report = MicrodataParser.getMicrodata(document);
- final ItemScope target = report.getDetectedItemScopes()[4];
- Date actualDate = target.getProperties().get("birthday").get(0).getValue().getAsDate();
- if (!expectedDate.equals(actualDate)) {
- foundFailure.set(true);
- }
- }
- }
- catch (Exception ex) {
- ex.printStackTrace();
- foundFailure.set(true);
- }
- finally {
- afterLatch.countDown();
- }
- }
+ @Override
+ public void run() {
+ try {
+ beforeLatch.await();
+ int counter = 0;
+ while (counter++ < attemptCount && !foundFailure.get()) {
+ final Document document = getDom(content);
+ final MicrodataParserReport report = MicrodataParser.getMicrodata(document);
+ final ItemScope target = report.getDetectedItemScopes()[4];
+ Date actualDate = target.getProperties().get("birthday").get(0).getValue().getAsDate();
+ if (!expectedDate.equals(actualDate)) {
+ foundFailure.set(true);
+ }
+ }
+ }
+ catch (Exception ex) {
+ ex.printStackTrace();
+ foundFailure.set(true);
+ }
+ finally {
+ afterLatch.countDown();
+ }
+ }
});
}
for (Thread thread : threads) {
@@ -167,8 +163,8 @@ public class MicrodataParserTest {
public void testDeferProperties() throws IOException, MicrodataParserException {
final Document document = getMicrodataDom("microdata-itemref");
final MicrodataParser parser = new MicrodataParser(document);
- final ItemProp[] deferred = parser.deferProperties("ip5", "ip4", "ip3", "unexisting");
- Assert.assertEquals(3, deferred.length);
+ final ItemProp[] deferred = parser.deferProperties(document.getElementById("is2"), "ip5", "ip4", "ip3", "unexisting");
+ Assert.assertEquals(2, deferred.length);
}
/**
@@ -181,8 +177,8 @@ public class MicrodataParserTest {
public void testDeferPropertiesLoopDetection1() throws IOException, MicrodataParserException {
final Document document = getMicrodataDom("microdata-itemref");
final MicrodataParser parser = new MicrodataParser(document);
- parser.setErrorMode(MicrodataParser.ErrorMode.StopAtFirstError);
- parser.deferProperties("loop0");
+ parser.setErrorMode(MicrodataParser.ErrorMode.STOP_AT_FIRST_ERROR);
+ parser.deferProperties(null, "loop0");
}
/**
@@ -195,8 +191,8 @@ public class MicrodataParserTest {
public void testDeferPropertiesLoopDetection2() throws IOException, MicrodataParserException {
final Document document = getMicrodataDom("microdata-itemref");
final MicrodataParser parser = new MicrodataParser(document);
- parser.setErrorMode(MicrodataParser.ErrorMode.StopAtFirstError);
- parser.deferProperties("loop2");
+ parser.setErrorMode(MicrodataParser.ErrorMode.STOP_AT_FIRST_ERROR);
+ parser.deferProperties(null, "loop2");
}
/**
@@ -210,9 +206,10 @@ public class MicrodataParserTest {
public void testDeferPropertiesStateManagement() throws IOException, MicrodataParserException {
final Document document = getMicrodataDom("microdata-itemref");
final MicrodataParser parser = new MicrodataParser(document);
- Assert.assertEquals(1, parser.deferProperties("ip1").length);
- Assert.assertEquals(1, parser.deferProperties("ip1").length);
- Assert.assertEquals(1, parser.deferProperties("ip1").length);
+ String ip1 = "ip1";
+ Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length);
+ Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length);
+ Assert.assertEquals(1, parser.deferProperties(document.getElementById(ip1), ip1).length);
}
private Document getDom(String document) throws IOException {
@@ -247,7 +244,7 @@ public class MicrodataParserTest {
private void extractItemsAndVerifyJSONSerialization(String htmlFile, String expectedResult)
throws IOException {
final MicrodataParserReport report = extractItems(htmlFile);
- final ItemScope[] items = report.getDetectedItemScopes();
+ final ItemScope[] items = report.getDetectedItemScopes();
final MicrodataParserException[] errors = report.getErrors();
logger.debug("begin itemScopes");
@@ -267,7 +264,7 @@ public class MicrodataParserTest {
final int expectedResults = getExpectedResultCount(resultContent);
final int expectedErrors = getExpectedErrorsCount(resultContent);
Assert.assertEquals("Unexpected number of detect items.", expectedResults, items.length);
- Assert.assertEquals("Unexpected number of errors." , expectedErrors, errors.length);
+ Assert.assertEquals("Unexpected number of errors.", expectedErrors, errors.length);
for (int i = 0; i < items.length; i++) {
Assert.assertEquals(
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
index f52cd59..680fe47 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
@@ -19,7 +19,6 @@ package org.apache.any23.extractor.rdfa;
import org.apache.any23.extractor.ExtractorFactory;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.repository.RepositoryException;
@@ -65,7 +64,6 @@ public class RDFaExtractorTest extends AbstractRDFaExtractorTestCase {
}
}
- @Ignore("RDFa1 parser not able to parse RDFa11 CURIES in this case")
@Test
public void testRDFa11CURIEs() throws Exception {
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java b/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
index f1f2d88..85f33b5 100644
--- a/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/xpath/TemplateXPathExtractorRuleImplTest.java
@@ -75,15 +75,15 @@ public class TemplateXPathExtractorRuleImplTest {
@Test
public void testAddRemoveTemplates() {
final QuadTemplate template1 = new QuadTemplate(
- new TemplateSubject(TemplateSubject.Type.uri, "http://sub1", false),
+ new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
new TemplatePredicate("http://pred1", false),
- new TemplateObject(TemplateObject.Type.uri, "http://obj1", false),
+ new TemplateObject(TemplateObject.Type.URI, "http://obj1", false),
new TemplateGraph("http://graph1", false)
);
final QuadTemplate template2 = new QuadTemplate(
- new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+ new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
new TemplatePredicate("http://pred2", false),
- new TemplateObject(TemplateObject.Type.uri, "http://obj2", false),
+ new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
new TemplateGraph("http://graph2", false)
);
@@ -94,9 +94,9 @@ public class TemplateXPathExtractorRuleImplTest {
xPathExtractionRule.add(new Variable("v1", "//"));
final QuadTemplate template3 = new QuadTemplate(
- new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+ new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
new TemplatePredicate("http://pred2", false),
- new TemplateObject(TemplateObject.Type.uri, "v1", true),
+ new TemplateObject(TemplateObject.Type.URI, "v1", true),
new TemplateGraph("http://graph2", false)
);
xPathExtractionRule.add(template3);
@@ -106,9 +106,9 @@ public class TemplateXPathExtractorRuleImplTest {
public void testAddTemplateWithNoDeclaredVarCheck() {
xPathExtractionRule.add(
new QuadTemplate(
- new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+ new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
new TemplatePredicate("http://pred2", false),
- new TemplateObject(TemplateObject.Type.uri, "v1", true),
+ new TemplateObject(TemplateObject.Type.URI, "v1", true),
new TemplateGraph("http://graph2", false)
)
);
@@ -127,15 +127,15 @@ public class TemplateXPathExtractorRuleImplTest {
@Test
public void testProcess() throws IOException {
final QuadTemplate template1 = new QuadTemplate(
- new TemplateSubject(TemplateSubject.Type.uri, "http://sub1", false),
+ new TemplateSubject(TemplateSubject.Type.URI, "http://sub1", false),
new TemplatePredicate("http://pred1", false),
- new TemplateObject(TemplateObject.Type.literal, "v1", true),
+ new TemplateObject(TemplateObject.Type.LITERAL, "v1", true),
new TemplateGraph("http://graph1", false)
);
final QuadTemplate template2 = new QuadTemplate(
- new TemplateSubject(TemplateSubject.Type.uri, "http://sub2", false),
+ new TemplateSubject(TemplateSubject.Type.URI, "http://sub2", false),
new TemplatePredicate("v2", true),
- new TemplateObject(TemplateObject.Type.uri, "http://obj2", false),
+ new TemplateObject(TemplateObject.Type.URI, "http://obj2", false),
new TemplateGraph("http://graph2", false)
);
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java b/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
index ce5b1d6..de6d980 100644
--- a/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
+++ b/core/src/test/java/org/apache/any23/filter/IgnoreAccidentalRDFaTest.java
@@ -28,7 +28,7 @@ import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
-import static org.mockito.Matchers.any;
+import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java b/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
index ebe6ba3..92ae30f 100644
--- a/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
+++ b/core/src/test/java/org/apache/any23/writer/JSONWriterTest.java
@@ -20,11 +20,7 @@ package org.apache.any23.writer;
import org.junit.Assert;
import org.junit.Test;
import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.BNodeImpl;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
-import org.eclipse.rdf4j.model.vocabulary.RDF;
-
import java.io.ByteArrayOutputStream;
/**
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/service/src/test/java/org/apache/any23/servlet/ServletTest.java
----------------------------------------------------------------------
diff --git a/service/src/test/java/org/apache/any23/servlet/ServletTest.java b/service/src/test/java/org/apache/any23/servlet/ServletTest.java
index bb168a1..bc87737 100644
--- a/service/src/test/java/org/apache/any23/servlet/ServletTest.java
+++ b/service/src/test/java/org/apache/any23/servlet/ServletTest.java
@@ -25,7 +25,6 @@ import org.apache.any23.util.StringUtils;
import org.junit.Assert;
import org.junit.After;
import org.junit.Before;
-import org.junit.Ignore;
import org.junit.Test;
import org.mortbay.jetty.testing.HttpTester;
import org.mortbay.jetty.testing.ServletTester;
@@ -229,7 +228,6 @@ public class ServletTest {
* @throws Exception
*/
@Test
- @Ignore
public void testGETwithURLEncoding() throws Exception {
content = null;
HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller");
@@ -241,7 +239,6 @@ public class ServletTest {
* @throws Exception
*/
@Test
- @Ignore
public void testGETwithURLEncodingWithQuery() throws Exception {
content = null;
HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller?appo=xxx");
@@ -253,7 +250,6 @@ public class ServletTest {
* @throws Exception
*/
@Test
- @Ignore
public void testGETwithURLEncodingWithFragment() throws Exception {
content = null;
HttpTester response = doGetRequest("/best/http://semanticweb.org/wiki/Knud_M%C3%B6ller#abcde");
[6/6] any23 git commit: Merge branch 'ANY23-320'
Posted by le...@apache.org.
Merge branch 'ANY23-320'
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6d0606f9
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6d0606f9
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6d0606f9
Branch: refs/heads/master
Commit: 6d0606f9b2ff3fa05d26b454e9a8fb24d3bfa24d
Parents: 97e364a 4640860
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Jan 8 08:16:00 2018 -0500
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Jan 8 08:16:00 2018 -0500
----------------------------------------------------------------------
.../any23/cli/ExtractorDocumentationTest.java | 2 -
.../java/org/apache/any23/cli/RoverTest.java | 2 -
.../org/apache/any23/cli/SimpleRoverTest.java | 2 -
.../any23/extractor/csv/CSVExtractor.java | 23 +-
.../extractor/html/EmbeddedJSONLDExtractor.java | 363 ++--
.../any23/extractor/html/HTMLMetaExtractor.java | 58 +-
.../apache/any23/extractor/microdata/Item.java | 10 +-
.../extractor/microdata/ItemPropValue.java | 31 +-
.../any23/extractor/microdata/ItemScope.java | 29 +-
.../extractor/microdata/MicrodataExtractor.java | 35 +-
.../extractor/microdata/MicrodataParser.java | 136 +-
.../any23/extractor/xpath/QuadTemplate.java | 1 +
.../any23/extractor/xpath/TemplateObject.java | 39 +-
.../any23/extractor/xpath/TemplateSubject.java | 13 +-
.../any23/extractor/yaml/ElementsProcessor.java | 24 +-
.../any23/rdf/Any23ValueFactoryWrapper.java | 61 +-
.../java/org/apache/any23/rdf/RDFUtils.java | 92 +-
.../XMLValidationReportSerializer.java | 21 +-
.../any23/validator/rule/AboutNotURIRule.java | 1 +
.../validator/rule/MetaNameMisuseRule.java | 1 +
.../org/apache/any23/vocab/RDFSchemaUtils.java | 24 +-
.../any23/extractor/csv/CSVExtractorTest.java | 178 +-
.../html/AbstractExtractorTestCase.java | 1592 ++++++++-------
.../extractor/html/HCardExtractorTest.java | 1852 +++++++++---------
.../extractor/html/HListingExtractorTest.java | 3 -
.../microdata/MicrodataParserTest.java | 81 +-
.../any23/extractor/rdfa/RDFaExtractorTest.java | 2 -
.../TemplateXPathExtractorRuleImplTest.java | 24 +-
.../any23/filter/IgnoreAccidentalRDFaTest.java | 2 +-
.../org/apache/any23/writer/JSONWriterTest.java | 4 -
.../org/apache/any23/servlet/ServletTest.java | 4 -
....2.1-non-normative-example-1-expected.nquads | 8 +-
.../5.2.1-non-normative-example-1.html | 48 +-
....2.1-non-normative-example-2-expected.nquads | 33 +-
.../5.2.1-non-normative-example-2.html | 16 +-
.../microdata-basic-expected.properties | 6 +-
.../resources/microdata/microdata-basic.html | 15 +-
.../microdata-itemref-expected.properties | 20 +-
.../resources/microdata/microdata-itemref.html | 46 +-
.../microdata/microdata-json-serialization.json | 2 +-
.../microdata/microdata-nested-expected.nquads | 19 +-
.../microdata-nested-expected.properties | 4 +-
.../resources/microdata/microdata-nested.html | 33 +-
.../microdata-richsnippet-expected.nquads | 27 +-
44 files changed, 2468 insertions(+), 2519 deletions(-)
----------------------------------------------------------------------
[3/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and
ANY23-131 Nested Microdata are not extracted
Posted by le...@apache.org.
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
index 8886e31..ed300af 100644
--- a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
@@ -21,11 +21,9 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.html.AbstractExtractorTestCase;
import org.apache.any23.vocab.CSV;
import org.junit.Test;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
-import org.eclipse.rdf4j.repository.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -36,93 +34,93 @@ import org.slf4j.LoggerFactory;
*/
public class CSVExtractorTest extends AbstractExtractorTestCase {
- private static final Logger logger = LoggerFactory
- .getLogger(CSVExtractorTest.class);
-
- @Override
- protected ExtractorFactory<?> getExtractorFactory() {
- return new CSVExtractorFactory();
- }
-
- @Test
- public void testExtractionCommaSeparated() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionSemicolonSeparated() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionTabSeparated() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- }
-
- @Test
- public void testTypeManagement() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 21);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionEmptyValue() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 25);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
- assertContains(null, null,
- SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
- }
+ private static final Logger logger = LoggerFactory
+ .getLogger(CSVExtractorTest.class);
+
+ @Override
+ protected ExtractorFactory<?> getExtractorFactory() {
+ return new CSVExtractorFactory();
+ }
+
+ @Test
+ public void testExtractionCommaSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionSemicolonSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionTabSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testTypeManagement() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 21);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionEmptyValue() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 25);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
+ assertContains(null, null,
+ SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
index 855a88c..5354924 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
@@ -31,6 +31,7 @@ import org.apache.any23.writer.RepositoryWriter;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
+import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
@@ -62,802 +63,799 @@ import java.util.Map;
*/
public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
- /**
- * Base test document.
- */
- protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/"); // TODO:
- // change
- // base
- // IRI
- // string.
-
- /**
- * Internal connection used to collect extraction results.
- */
- protected RepositoryConnection conn;
-
- /**
- * The latest generated report.
- */
- private SingleDocumentExtractionReport report;
-
- private Sail store;
-
- private SailRepository repository;
-
- /**
- * Constructor.
- */
- public AbstractExtractorTestCase() {
- super();
- }
-
- /**
- * @return the factory of the extractor to be tested.
- */
- protected abstract ExtractorFactory<?> getExtractorFactory();
-
- /**
- * Test case initialization.
- *
- * @throws Exception
- */
- @Before
- public void setUp() throws Exception {
- super.setUp();
- store = new MemoryStore();
- repository = new SailRepository(store);
- repository.initialize();
- conn = repository.getConnection();
- }
-
- /**
- * Test case resources release.
- *
- * @throws RepositoryException
- */
- @After
- public void tearDown() throws RepositoryException {
- try {
- conn.close();
- } finally {
- repository.shutDown();
- }
- conn = null;
- report = null;
- store = null;
- repository = null;
- }
-
- /**
- * @return the connection to the memory repository.
- */
- protected RepositoryConnection getConnection() {
- return conn;
- }
-
- /**
- * @return the last generated report.
- */
- protected SingleDocumentExtractionReport getReport() {
- return report;
- }
-
- /**
- * Returns the list of issues raised by a given extractor.
- *
- * @param extractorName
- * name of the extractor.
- * @return collection of issues.
- */
- protected Collection<IssueReport.Issue> getIssues(String extractorName) {
- for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report
- .getExtractorToIssues().entrySet()) {
- if (issueEntry.getKey().equals(extractorName)) {
- return issueEntry.getValue();
- }
- }
- return Collections.emptyList();
- }
-
- /**
- * Returns the list of issues raised by the extractor under testing.
- *
- * @return collection of issues.
- */
- protected Collection<IssueReport.Issue> getIssues() {
- return getIssues(getExtractorFactory().getExtractorName());
- }
-
- /**
- * Applies the extractor provided by the {@link #getExtractorFactory()} to
- * the specified resource.
- *
- * @param resource
- * resource name.
- * @throws org.apache.any23.extractor.ExtractionException
- * @throws IOException
- */
- // TODO: MimeType detector to null forces the execution of all extractors,
- // but extraction
- // tests should be based on mimetype detection.
- protected void extract(String resource) throws ExtractionException,
- IOException {
- SingleDocumentExtraction ex = new SingleDocumentExtraction(
- new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI
- .toString()), getExtractorFactory(),
- new RepositoryWriter(conn));
- ex.setMIMETypeDetector(null);
- report = ex.run();
- }
-
- /**
- * Performs data extraction over the content of a resource and assert that
- * the extraction was fine.
- *
- * @param resource
- * resource name.
- * @param assertNoIssues
- * if <code>true</code>invokes {@link #assertNoIssues()} after
- * the extraction.
- */
- protected void assertExtract(String resource, boolean assertNoIssues) {
- try {
- extract(resource);
- if (assertNoIssues)
- assertNoIssues();
- } catch (ExtractionException ex) {
- throw new RuntimeException(ex);
- } catch (IOException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Performs data extraction over the content of a resource and assert that
- * the extraction was fine and raised no issues.
- *
- * @param resource
- */
- protected void assertExtract(String resource) {
- assertExtract(resource, true);
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(_ p o)</code>.
- *
- * @param p
- * predicate
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertContains(IRI p, Resource o) throws RepositoryException {
- assertContains(null, p, o);
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(_ p o)</code>.
- *
- * @param p
- * predicate
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertContains(IRI p, String o) throws RepositoryException {
- assertContains(null, p, RDFUtils.literal(o));
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(_ p o)</code>.
- *
- * @param p
- * predicate
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(IRI p, Resource o)
- throws RepositoryException {
- assertNotContains(null, p, o);
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertContains(Resource s, IRI p, Value o)
- throws RepositoryException {
- Assert.assertTrue(
- getFailedExtractionMessage()
- + String.format("Cannot find triple (%s %s %s)", s, p,
- o), conn.hasStatement(s, p, o, false));
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(Resource s, IRI p, String o)
- throws RepositoryException {
- Assert.assertFalse(getFailedExtractionMessage(),
- conn.hasStatement(s, p, RDFUtils.literal(o), false));
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(Resource s, IRI p, Resource o)
- throws RepositoryException {
- Assert.assertFalse(getFailedExtractionMessage(),
- conn.hasStatement(s, p, o, false));
- }
-
- /**
- * Asserts that the model contains at least a statement.
- *
- * @throws RepositoryException
- */
- protected void assertModelNotEmpty() throws RepositoryException {
- Assert.assertFalse("The model is expected to not be empty."
- + getFailedExtractionMessage(), conn.isEmpty());
- }
-
- /**
- * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(Resource s, IRI p, Literal o)
- throws RepositoryException {
- Assert.assertFalse(getFailedExtractionMessage(),
- conn.hasStatement(s, p, o, false));
- }
-
- /**
- * Asserts that the model is expected to contains no statements.
- *
- * @throws RepositoryException
- */
- protected void assertModelEmpty() throws RepositoryException {
- Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
- }
-
- /**
- * Asserts that the extraction generated no issues.
- */
- protected void assertNoIssues() {
- for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report
- .getExtractorToIssues().entrySet()) {
- if (entry.getValue().size() > 0) {
- System.out.println("Unexpected issue for extractor " + entry.getKey()
- + " : " + entry.getValue());
- }
- for(Issue nextIssue : entry.getValue()) {
- if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
- Assert.fail("Unexpected issue for extractor " + entry.getKey()
- + " : " + entry.getValue());
- }
- }
- }
- }
-
- /**
- * Asserts that an issue has been produced by the processed
- * {@link org.apache.any23.extractor.Extractor}.
- *
- * @param level
- * expected issue level
- * @param issueRegex
- * regex matching the expected human readable issue message.
- */
- protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
- final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory()
- .getExtractorName());
- boolean found = false;
- for (IssueReport.Issue issue : issues) {
- if (issue.getLevel() == level
- && issue.getMessage().matches(issueRegex)) {
- found = true;
- break;
- }
- }
- Assert.assertTrue(String.format(
- "Cannot find issue with level %s matching expression '%s'",
- level, issueRegex), found);
- }
-
- /**
- * Verifies that the current model contains all the given statements.
- *
- * @param statements
- * list of statements to be verified.
- * @throws RepositoryException
- */
- public void assertContainsModel(Statement[] statements)
- throws RepositoryException {
- for (Statement statement : statements) {
- assertContains(statement);
- }
- }
-
- /**
- * Verifies that the current model contains all the statements declared in
- * the specified <code>modelFile</code>.
- *
- * @param modelResource
- * the resource containing the model.
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- * @throws RepositoryException
- */
- public void assertContainsModel(String modelResource)
- throws RDFHandlerException, IOException, RDFParseException,
- RepositoryException {
- getConnection().remove(null, SINDICE.getInstance().date, (Value) null,
- (Resource) null);
- getConnection().remove(null, SINDICE.getInstance().size, (Value) null,
- (Resource) null);
- assertContainsModel(RDFUtils.parseRDF(modelResource));
- }
-
- /**
- * Asserts that the given pattern <code>(s p o)</code> satisfies the
- * expected number of statements.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @param expected
- * expected matches.
- * @throws RepositoryException
- */
- protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
- throws RDFHandlerException, RepositoryException {
- int statementsSize = getStatementsSize(s, p, o);
- if (statementsSize != expected) {
- getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out));
- }
-
- Assert.assertEquals("Unexpected number of matching statements.",
- expected, statementsSize);
- }
-
- /**
- * Asserts that the given pattern <code>(_ p o)</code> satisfies the
- * expected number of statements.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @param expected
- * expected matches.
- * @throws RepositoryException
- */
- protected void assertStatementsSize(IRI p, Value o, int expected)
- throws RDFHandlerException, RepositoryException {
- assertStatementsSize(null, p, o, expected);
- }
-
- /**
- * Asserts that the given pattern <code>(_ p o)</code> satisfies the
- * expected number of statements.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @param expected
- * expected matches.
- * @throws RepositoryException
- */
- protected void assertStatementsSize(IRI p, String o, int expected)
- throws RDFHandlerException, RepositoryException {
- assertStatementsSize(p, o == null ? null : RDFUtils.literal(o),
- expected);
- }
-
- /**
- * Asserts that the given pattern <code>(s p _)</code> is not present.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @throws RepositoryException
- */
- protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
- RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
- true);
- try {
- Assert.assertFalse("Expected no statements.", statements.hasNext());
- } finally {
- statements.close();
- }
- }
-
- /**
- * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it
- * is expected to exists and be just one.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @return the matching blank subject.
- * @throws RepositoryException
- */
- protected Resource findExactlyOneBlankSubject(IRI p, Value o)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
- try {
- Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
- Statement stmt = it.next();
- Resource result = stmt.getSubject();
- Assert.assertTrue(getFailedExtractionMessage(),
- result instanceof BNode);
- Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
- return result;
- } finally {
- it.close();
- }
- }
-
- /**
- * Returns the object matching the pattern <code>(s p o)</code>, it is
- * expected to exists and be just one.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @return the matching object.
- * @throws RepositoryException
- */
- protected Value findExactlyOneObject(Resource s, IRI p)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
- try {
- Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
- return it.next().getObject();
- } finally {
- it.close();
- }
- }
-
- /**
- * Returns all the subjects matching the pattern <code>(s? p o)</code>.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @return list of matching subjects.
- * @throws RepositoryException
- */
- protected List<Resource> findSubjects(IRI p, Value o)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
- List<Resource> subjects = new ArrayList<Resource>();
- try {
- Statement statement;
- while (it.hasNext()) {
- statement = it.next();
- subjects.add(statement.getSubject());
- }
- } finally {
- it.close();
- }
- return subjects;
- }
-
- /**
- * Returns all the objects matching the pattern <code>(s p _)</code>.
- *
- * @param s
- * predicate.
- * @param p
- * predicate.
- * @return list of matching objects.
- * @throws RepositoryException
- */
- protected List<Value> findObjects(Resource s, IRI p)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
- List<Value> objects = new ArrayList<Value>();
- try {
- Statement statement;
- while (it.hasNext()) {
- statement = it.next();
- objects.add(statement.getObject());
- }
- } finally {
- it.close();
- }
- return objects;
- }
-
- /**
- * Finds the object matching the pattern <code>(s p _)</code>, asserts to
- * find exactly one result.
- *
- * @param s
- * subject.
- * @param p
- * predicate
- * @return matching object.
- * @throws org.openrdf.repository.RepositoryException
- */
- protected Value findObject(Resource s, IRI p) throws RepositoryException {
- RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
- true);
- try {
- Assert.assertTrue("Expected at least a statement.",
- statements.hasNext());
- return (statements.next().getObject());
- } finally {
- statements.close();
- }
- }
-
- /**
- * Finds the resource object matching the pattern <code>(s p _)</code>,
- * asserts to find exactly one result.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @return matching object.
- * @throws RepositoryException
- */
- protected Resource findObjectAsResource(Resource s, IRI p)
- throws RepositoryException {
- final Value v = findObject(s, p);
- try {
- return (Resource) v;
- } catch (ClassCastException cce) {
- Assert.fail("Expected resource object, found: "
- + v.getClass().getSimpleName());
- throw new IllegalStateException();
- }
- }
-
- /**
- * Finds the literal object matching the pattern <code>(s p _)</code>,
- * asserts to find exactly one result.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @return matching object.
- * @throws RepositoryException
- */
- protected String findObjectAsLiteral(Resource s, IRI p)
- throws RepositoryException {
- return findObject(s, p).stringValue();
- }
-
- /**
- * Dumps the extracted model in <i>Turtle</i> format.
- *
- * @return a string containing the model in Turtle.
- * @throws RepositoryException
- */
- protected String dumpModelToTurtle() throws RepositoryException {
- StringWriter w = new StringWriter();
- try {
- conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
- return w.toString();
- } catch (RDFHandlerException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Dumps the extracted model in <i>NQuads</i> format.
- *
- * @return a string containing the model in NQuads.
- * @throws RepositoryException
- */
- protected String dumpModelToNQuads() throws RepositoryException {
- StringWriter w = new StringWriter();
- try {
- conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
- return w.toString();
- } catch (RDFHandlerException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Dumps the extracted model in <i>RDFXML</i> format.
- *
- * @return a string containing the model in RDFXML.
- * @throws RepositoryException
- */
- protected String dumpModelToRDFXML() throws RepositoryException {
- StringWriter w = new StringWriter();
- try {
- conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
- return w.toString();
- } catch (RDFHandlerException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Dumps the list of statements contained in the extracted model.
- *
- * @return list of extracted statements.
- * @throws RepositoryException
- */
- protected List<Statement> dumpAsListOfStatements()
- throws RepositoryException {
- return conn.getStatements(null, null, null, false).asList();
- }
-
- /**
- * @return string containing human readable statements.
- * @throws RepositoryException
- */
- protected String dumpHumanReadableTriples() throws RepositoryException {
- StringBuilder sb = new StringBuilder();
- RepositoryResult<Statement> result = conn.getStatements(null, null,
- null, false);
- while (result.hasNext()) {
- Statement statement = result.next();
- sb.append(String.format("%s %s %s %s\n", statement.getSubject(),
- statement.getPredicate(), statement.getObject(),
- statement.getContext()));
-
- }
- return sb.toString();
- }
-
- /**
- * Checks that a statement is contained in the extracted model. If the
- * statement declares bnodes, they are replaced with <code>_</code>
- * patterns.
- *
- * @param statement
- * @throws RepositoryException
- */
- // TODO: bnode check is too weak, introduce graph omomorphism check.
- protected void assertContains(Statement statement)
- throws RepositoryException {
- Assert.assertTrue("Cannot find statement " + statement + " in model.",
- conn.hasStatement(
- statement.getSubject() instanceof BNode ? null
- : statement.getSubject(), statement
- .getPredicate(),
- statement.getObject() instanceof BNode ? null
- : statement.getObject(), false));
- }
-
- /**
- * Assert that the model contains the statement <code>(s p l)</code> where
- * <code>l</code> is a literal.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param l
- * literal content.
- * @throws RepositoryException
- */
- protected void assertContains(Resource s, IRI p, String l)
- throws RepositoryException {
- assertContains(s, p, RDFUtils.literal(l));
- }
-
- /**
- * Assert that the model contains the statement <code>(s p l)</code> where
- * <code>l</code> is a language literal.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param l
- * literal content.
- * @param lang
- * literal language.
- * @throws RepositoryException
- */
- protected void assertContains(Resource s, IRI p, String l, String lang)
- throws RepositoryException {
- assertContains(s, p, RDFUtils.literal(l, lang));
- }
-
- /**
- * Returns all statements matching the pattern <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @return list of statements.
- * @throws RepositoryException
- */
- protected RepositoryResult<Statement> getStatements(Resource s, IRI p,
- Value o) throws RepositoryException {
- return conn.getStatements(s, p, o, false);
- }
-
- /**
- * Counts all statements matching the pattern <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @return number of matches.
- * @throws RepositoryException
- */
- protected int getStatementsSize(Resource s, IRI p, Value o)
- throws RepositoryException {
- RepositoryResult<Statement> result = getStatements(s, p, o);
- int count = 0;
- try {
- while (result.hasNext()) {
- result.next();
- count++;
- }
- } finally {
- result.close();
- }
- return count;
- }
-
- private String getFailedExtractionMessage() throws RepositoryException {
- return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
- }
+ /**
+ * Base test document.
+ */
+ //TODO: change base IRI string.
+ protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/");
+
+ /**
+ * Internal connection used to collect extraction results.
+ */
+ protected RepositoryConnection conn;
+
+ /**
+ * The latest generated report.
+ */
+ private SingleDocumentExtractionReport report;
+
+ private Sail store;
+
+ private SailRepository repository;
+
+ /**
+ * Constructor.
+ */
+ public AbstractExtractorTestCase() {
+ super();
+ }
+
+ /**
+ * @return the factory of the extractor to be tested.
+ */
+ protected abstract ExtractorFactory<?> getExtractorFactory();
+
+ /**
+ * Test case initialization.
+ *
+ * @throws Exception
+ */
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ store = new MemoryStore();
+ repository = new SailRepository(store);
+ repository.initialize();
+ conn = repository.getConnection();
+ }
+
+ /**
+ * Test case resources release.
+ *
+ * @throws RepositoryException
+ */
+ @After
+ public void tearDown() throws RepositoryException {
+ try {
+ conn.close();
+ } finally {
+ repository.shutDown();
+ }
+ conn = null;
+ report = null;
+ store = null;
+ repository = null;
+ }
+
+ /**
+ * @return the connection to the memory repository.
+ */
+ protected RepositoryConnection getConnection() {
+ return conn;
+ }
+
+ /**
+ * @return the last generated report.
+ */
+ protected SingleDocumentExtractionReport getReport() {
+ return report;
+ }
+
+ /**
+ * Returns the list of issues raised by a given extractor.
+ *
+ * @param extractorName
+ * name of the extractor.
+ * @return collection of issues.
+ */
+ protected Collection<IssueReport.Issue> getIssues(String extractorName) {
+ for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report
+ .getExtractorToIssues().entrySet()) {
+ if (issueEntry.getKey().equals(extractorName)) {
+ return issueEntry.getValue();
+ }
+ }
+ return Collections.emptyList();
+ }
+
+ /**
+ * Returns the list of issues raised by the extractor under testing.
+ *
+ * @return collection of issues.
+ */
+ protected Collection<IssueReport.Issue> getIssues() {
+ return getIssues(getExtractorFactory().getExtractorName());
+ }
+
+ /**
+ * Applies the extractor provided by the {@link #getExtractorFactory()} to
+ * the specified resource.
+ *
+ * @param resource
+ * resource name.
+ * @throws org.apache.any23.extractor.ExtractionException
+ * @throws IOException
+ */
+ // TODO: MimeType detector to null forces the execution of all extractors,
+ // but extraction
+ // tests should be based on mimetype detection.
+ protected void extract(String resource) throws ExtractionException,
+ IOException {
+ SingleDocumentExtraction ex = new SingleDocumentExtraction(
+ new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI
+ .toString()), getExtractorFactory(),
+ new RepositoryWriter(conn));
+ ex.setMIMETypeDetector(null);
+ report = ex.run();
+ }
+
+ /**
+ * Performs data extraction over the content of a resource and assert that
+ * the extraction was fine.
+ *
+ * @param resource
+ * resource name.
+ * @param assertNoIssues
+ * if <code>true</code>invokes {@link #assertNoIssues()} after
+ * the extraction.
+ */
+ protected void assertExtract(String resource, boolean assertNoIssues) {
+ try {
+ extract(resource);
+ if (assertNoIssues)
+ assertNoIssues();
+ } catch (ExtractionException ex) {
+ throw new RuntimeException(ex);
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Performs data extraction over the content of a resource and assert that
+ * the extraction was fine and raised no issues.
+ *
+ * @param resource
+ */
+ protected void assertExtract(String resource) {
+ assertExtract(resource, true);
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(_ p o)</code>.
+ *
+ * @param p
+ * predicate
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertContains(IRI p, Resource o) throws RepositoryException {
+ assertContains(null, p, o);
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(_ p o)</code>.
+ *
+ * @param p
+ * predicate
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertContains(IRI p, String o) throws RepositoryException {
+ assertContains(null, p, RDFUtils.literal(o));
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(_ p o)</code>.
+ *
+ * @param p
+ * predicate
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(IRI p, Resource o)
+ throws RepositoryException {
+ assertNotContains(null, p, o);
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertContains(Resource s, IRI p, Value o)
+ throws RepositoryException {
+ Assert.assertTrue(
+ getFailedExtractionMessage()
+ + String.format("Cannot find triple (%s %s %s)", s, p,
+ o), conn.hasStatement(s, p, o, false));
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(Resource s, IRI p, String o)
+ throws RepositoryException {
+ Assert.assertFalse(getFailedExtractionMessage(),
+ conn.hasStatement(s, p, RDFUtils.literal(o), false));
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(Resource s, IRI p, Resource o)
+ throws RepositoryException {
+ Assert.assertFalse(getFailedExtractionMessage(),
+ conn.hasStatement(s, p, o, false));
+ }
+
+ /**
+ * Asserts that the model contains at least a statement.
+ *
+ * @throws RepositoryException
+ */
+ protected void assertModelNotEmpty() throws RepositoryException {
+ Assert.assertFalse("The model is expected to not be empty."
+ + getFailedExtractionMessage(), conn.isEmpty());
+ }
+
+ /**
+ * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(Resource s, IRI p, Literal o)
+ throws RepositoryException {
+ Assert.assertFalse(getFailedExtractionMessage(),
+ conn.hasStatement(s, p, o, false));
+ }
+
+ /**
+ * Asserts that the model is expected to contains no statements.
+ *
+ * @throws RepositoryException
+ */
+ protected void assertModelEmpty() throws RepositoryException {
+ Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
+ }
+
+ /**
+ * Asserts that the extraction generated no issues.
+ */
+ protected void assertNoIssues() {
+ for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report
+ .getExtractorToIssues().entrySet()) {
+ if (entry.getValue().size() > 0) {
+ System.out.println("Unexpected issue for extractor " + entry.getKey()
+ + " : " + entry.getValue());
+ }
+ for(Issue nextIssue : entry.getValue()) {
+ if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
+ Assert.fail("Unexpected issue for extractor " + entry.getKey()
+ + " : " + entry.getValue());
+ }
+ }
+ }
+ }
+
+ /**
+ * Asserts that an issue has been produced by the processed
+ * {@link org.apache.any23.extractor.Extractor}.
+ *
+ * @param level
+ * expected issue level
+ * @param issueRegex
+ * regex matching the expected human readable issue message.
+ */
+ protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
+ final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory()
+ .getExtractorName());
+ boolean found = false;
+ for (IssueReport.Issue issue : issues) {
+ if (issue.getLevel() == level
+ && issue.getMessage().matches(issueRegex)) {
+ found = true;
+ break;
+ }
+ }
+ Assert.assertTrue(String.format(
+ "Cannot find issue with level %s matching expression '%s'",
+ level, issueRegex), found);
+ }
+
+ /**
+ * Verifies that the current model contains all the given statements.
+ *
+ * @param statements
+ * list of statements to be verified.
+ * @throws RepositoryException
+ */
+ public void assertContainsModel(Statement[] statements)
+ throws RepositoryException {
+ for (Statement statement : statements) {
+ assertContains(statement);
+ }
+ }
+
+ /**
+ * Verifies that the current model contains all the statements declared in
+ * the specified <code>modelFile</code>.
+ *
+ * @param modelResource
+ * the resource containing the model.
+ * @throws RDFHandlerException
+ * @throws IOException
+ * @throws RDFParseException
+ * @throws RepositoryException
+ */
+ public void assertContainsModel(String modelResource)
+ throws RDFHandlerException, IOException, RDFParseException,
+ RepositoryException {
+ getConnection().remove(null, SINDICE.getInstance().date, (Value) null,
+ (Resource) null);
+ getConnection().remove(null, SINDICE.getInstance().size, (Value) null,
+ (Resource) null);
+ assertContainsModel(RDFUtils.parseRDF(modelResource));
+ }
+
+ /**
+ * Asserts that the given pattern <code>(s p o)</code> satisfies the
+ * expected number of statements.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @param expected
+ * expected matches.
+ * @throws RepositoryException
+ */
+ protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
+ throws RDFHandlerException, RepositoryException {
+ int statementsSize = getStatementsSize(s, p, o);
+ if (statementsSize != expected) {
+ getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out));
+ }
+
+ Assert.assertEquals("Unexpected number of matching statements.",
+ expected, statementsSize);
+ }
+
+ /**
+ * Asserts that the given pattern <code>(_ p o)</code> satisfies the
+ * expected number of statements.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @param expected
+ * expected matches.
+ * @throws RepositoryException
+ */
+ protected void assertStatementsSize(IRI p, Value o, int expected)
+ throws RDFHandlerException, RepositoryException {
+ assertStatementsSize(null, p, o, expected);
+ }
+
+ /**
+ * Asserts that the given pattern <code>(_ p o)</code> satisfies the
+ * expected number of statements.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @param expected
+ * expected matches.
+ * @throws RepositoryException
+ */
+ protected void assertStatementsSize(IRI p, String o, int expected)
+ throws RDFHandlerException, RepositoryException {
+ assertStatementsSize(p, o == null ? null : RDFUtils.literal(o),
+ expected);
+ }
+
+ /**
+ * Asserts that the given pattern <code>(s p _)</code> is not present.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @throws RepositoryException
+ */
+ protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
+ RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
+ true);
+ try {
+ Assert.assertFalse("Expected no statements.", statements.hasNext());
+ } finally {
+ statements.close();
+ }
+ }
+
+ /**
+ * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it
+ * is expected to exists and be just one.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return the matching blank subject.
+ * @throws RepositoryException
+ */
+ protected Resource findExactlyOneBlankSubject(IRI p, Value o)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
+ try {
+ Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
+ Statement stmt = it.next();
+ Resource result = stmt.getSubject();
+ Assert.assertTrue(getFailedExtractionMessage(),
+ result instanceof BNode);
+ Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
+ return result;
+ } finally {
+ it.close();
+ }
+ }
+
+ /**
+ * Returns the object matching the pattern <code>(s p o)</code>, it is
+ * expected to exists and be just one.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @return the matching object.
+ * @throws RepositoryException
+ */
+ protected Value findExactlyOneObject(Resource s, IRI p)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
+ try {
+ Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
+ return it.next().getObject();
+ } finally {
+ it.close();
+ }
+ }
+
+ /**
+ * Returns all the subjects matching the pattern <code>(s? p o)</code>.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return list of matching subjects.
+ * @throws RepositoryException
+ */
+ protected List<Resource> findSubjects(IRI p, Value o)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
+ List<Resource> subjects = new ArrayList<Resource>();
+ try {
+ Statement statement;
+ while (it.hasNext()) {
+ statement = it.next();
+ subjects.add(statement.getSubject());
+ }
+ } finally {
+ it.close();
+ }
+ return subjects;
+ }
+
+ /**
+ * Returns all the objects matching the pattern <code>(s p _)</code>.
+ *
+ * @param s
+ * predicate.
+ * @param p
+ * predicate.
+ * @return list of matching objects.
+ * @throws RepositoryException
+ */
+ protected List<Value> findObjects(Resource s, IRI p)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
+ List<Value> objects = new ArrayList<Value>();
+ try {
+ Statement statement;
+ while (it.hasNext()) {
+ statement = it.next();
+ objects.add(statement.getObject());
+ }
+ } finally {
+ it.close();
+ }
+ return objects;
+ }
+
+ /**
+ * Finds the object matching the pattern <code>(s p _)</code>, asserts to
+ * find exactly one result.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate
+ * @return matching object.
+ * @throws org.openrdf.repository.RepositoryException
+ */
+ protected Value findObject(Resource s, IRI p) throws RepositoryException {
+ RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
+ true);
+ try {
+ Assert.assertTrue("Expected at least a statement.",
+ statements.hasNext());
+ return (statements.next().getObject());
+ } finally {
+ statements.close();
+ }
+ }
+
+ /**
+ * Finds the resource object matching the pattern <code>(s p _)</code>,
+ * asserts to find exactly one result.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @return matching object.
+ * @throws RepositoryException
+ */
+ protected Resource findObjectAsResource(Resource s, IRI p)
+ throws RepositoryException {
+ final Value v = findObject(s, p);
+ try {
+ return (Resource) v;
+ } catch (ClassCastException cce) {
+ Assert.fail("Expected resource object, found: "
+ + v.getClass().getSimpleName());
+ throw new IllegalStateException();
+ }
+ }
+
+ /**
+ * Finds the literal object matching the pattern <code>(s p _)</code>,
+ * asserts to find exactly one result.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @return matching object.
+ * @throws RepositoryException
+ */
+ protected String findObjectAsLiteral(Resource s, IRI p)
+ throws RepositoryException {
+ return findObject(s, p).stringValue();
+ }
+
+ /**
+ * Dumps the extracted model in <i>Turtle</i> format.
+ *
+ * @return a string containing the model in Turtle.
+ * @throws RepositoryException
+ */
+ protected String dumpModelToTurtle() throws RepositoryException {
+ StringWriter w = new StringWriter();
+ try {
+ conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
+ return w.toString();
+ } catch (RDFHandlerException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Dumps the extracted model in <i>NQuads</i> format.
+ *
+ * @return a string containing the model in NQuads.
+ * @throws RepositoryException
+ */
+ protected String dumpModelToNQuads() throws RepositoryException {
+ StringWriter w = new StringWriter();
+ try {
+ conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
+ return w.toString();
+ } catch (RDFHandlerException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Dumps the extracted model in <i>RDFXML</i> format.
+ *
+ * @return a string containing the model in RDFXML.
+ * @throws RepositoryException
+ */
+ protected String dumpModelToRDFXML() throws RepositoryException {
+ StringWriter w = new StringWriter();
+ try {
+ conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
+ return w.toString();
+ } catch (RDFHandlerException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Dumps the list of statements contained in the extracted model.
+ *
+ * @return list of extracted statements.
+ * @throws RepositoryException
+ */
+ protected List<Statement> dumpAsListOfStatements()
+ throws RepositoryException {
+ return Iterations.asList(conn.getStatements(null, null, null, false));
+ }
+
+ /**
+ * @return string containing human readable statements.
+ * @throws RepositoryException
+ */
+ protected String dumpHumanReadableTriples() throws RepositoryException {
+ StringBuilder sb = new StringBuilder();
+ RepositoryResult<Statement> result = conn.getStatements(null, null,
+ null, false);
+ while (result.hasNext()) {
+ Statement statement = result.next();
+ sb.append(String.format("%s %s %s %s\n", statement.getSubject(),
+ statement.getPredicate(), statement.getObject(),
+ statement.getContext()));
+
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Checks that a statement is contained in the extracted model. If the
+ * statement declares bnodes, they are replaced with <code>_</code>
+ * patterns.
+ *
+ * @param statement
+ * @throws RepositoryException
+ */
+ // TODO: bnode check is too weak, introduce graph omomorphism check.
+ protected void assertContains(Statement statement)
+ throws RepositoryException {
+ Assert.assertTrue("Cannot find statement " + statement + " in model.",
+ conn.hasStatement(
+ statement.getSubject() instanceof BNode ? null
+ : statement.getSubject(), statement
+ .getPredicate(),
+ statement.getObject() instanceof BNode ? null
+ : statement.getObject(), false));
+ }
+
+ /**
+ * Assert that the model contains the statement <code>(s p l)</code> where
+ * <code>l</code> is a literal.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param l
+ * literal content.
+ * @throws RepositoryException
+ */
+ protected void assertContains(Resource s, IRI p, String l)
+ throws RepositoryException {
+ assertContains(s, p, RDFUtils.literal(l));
+ }
+
+ /**
+ * Assert that the model contains the statement <code>(s p l)</code> where
+ * <code>l</code> is a language literal.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param l
+ * literal content.
+ * @param lang
+ * literal language.
+ * @throws RepositoryException
+ */
+ protected void assertContains(Resource s, IRI p, String l, String lang)
+ throws RepositoryException {
+ assertContains(s, p, RDFUtils.literal(l, lang));
+ }
+
+ /**
+ * Returns all statements matching the pattern <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return list of statements.
+ * @throws RepositoryException
+ */
+ protected RepositoryResult<Statement> getStatements(Resource s, IRI p,
+ Value o) throws RepositoryException {
+ return conn.getStatements(s, p, o, false);
+ }
+
+ /**
+ * Counts all statements matching the pattern <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return number of matches.
+ * @throws RepositoryException
+ */
+ protected int getStatementsSize(Resource s, IRI p, Value o)
+ throws RepositoryException {
+ RepositoryResult<Statement> result = getStatements(s, p, o);
+ int count = 0;
+ try {
+ while (result.hasNext()) {
+ result.next();
+ count++;
+ }
+ } finally {
+ result.close();
+ }
+ return count;
+ }
+
+ private String getFailedExtractionMessage() throws RepositoryException {
+ return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
+ }
}
\ No newline at end of file
[5/6] any23 git commit: Resolve merge conflict between master and
ANY23-320
Posted by le...@apache.org.
Resolve merge conflict between master and ANY23-320
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/46408604
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/46408604
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/46408604
Branch: refs/heads/master
Commit: 46408604a18ec289e252093a67f5ea010eed4488
Parents: 60e93a7 d2ace9c
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Wed Jan 3 00:19:05 2018 +0000
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Wed Jan 3 00:19:05 2018 +0000
----------------------------------------------------------------------
api/src/main/java/org/apache/any23/vocab/YAML.java | 7 +++++--
.../org/apache/any23/extractor/yaml/ElementsProcessor.java | 6 ++----
.../apache/any23/extractor/yaml/ElementsProcessorTest.java | 1 -
.../org/apache/any23/extractor/yaml/YAMLExtractorTest.java | 7 +------
.../test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java | 4 ++--
.../resources/org/apache/any23/extractor/yaml/test-null.yml | 4 +++-
6 files changed, 13 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/46408604/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
----------------------------------------------------------------------
diff --cc core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
index 90863ac,a4604e4..75c6611
--- a/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
@@@ -104,18 -100,16 +104,17 @@@ public class ElementsProcessor
* created.
* @return instance of {@link ModelHolder},
*/
+ @SuppressWarnings("unchecked")
public ModelHolder asModel(IRI namespace, final Object t, Value rootNode) {
- if (t == null) {
- return null;
- }
if (t instanceof List) {
- return processList(namespace, (List) t);
+ return processList(namespace, (List<Object>) t);
} else if (t instanceof Map) {
- return processMap(namespace, (Map) t, rootNode);
+ return processMap(namespace, (Map<String, Object>) t, rootNode);
} else if (t instanceof String) {
return asModelHolder(RDFUtils.makeIRI(t.toString()), modelFactory.createEmptyModel());
+ } else if (t == null) {
+ return asModelHolder(vocab.nullValue, modelFactory.createEmptyModel());
} else {
return asModelHolder(Literals.createLiteral(vf, t), modelFactory.createEmptyModel());
}
[4/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and
ANY23-131 Nested Microdata are not extracted
Posted by le...@apache.org.
ANY23-320 Address @Ignore tests in Any23 and ANY23-131 Nested Microdata are not extracted
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/60e93a76
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/60e93a76
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/60e93a76
Branch: refs/heads/master
Commit: 60e93a76748e53c413529409fb545e2245013639
Parents: 0613280
Author: Lewis John McGibbney <le...@gmail.com>
Authored: Mon Jan 1 02:58:36 2018 +0000
Committer: Lewis John McGibbney <le...@gmail.com>
Committed: Mon Jan 1 02:58:36 2018 +0000
----------------------------------------------------------------------
.../any23/cli/ExtractorDocumentationTest.java | 2 -
.../java/org/apache/any23/cli/RoverTest.java | 2 -
.../org/apache/any23/cli/SimpleRoverTest.java | 2 -
.../any23/extractor/csv/CSVExtractor.java | 23 +-
.../extractor/html/EmbeddedJSONLDExtractor.java | 363 ++--
.../any23/extractor/html/HTMLMetaExtractor.java | 58 +-
.../apache/any23/extractor/microdata/Item.java | 10 +-
.../extractor/microdata/ItemPropValue.java | 31 +-
.../any23/extractor/microdata/ItemScope.java | 29 +-
.../extractor/microdata/MicrodataExtractor.java | 35 +-
.../extractor/microdata/MicrodataParser.java | 136 +-
.../any23/extractor/xpath/QuadTemplate.java | 1 +
.../any23/extractor/xpath/TemplateObject.java | 39 +-
.../any23/extractor/xpath/TemplateSubject.java | 13 +-
.../any23/extractor/yaml/ElementsProcessor.java | 24 +-
.../any23/rdf/Any23ValueFactoryWrapper.java | 61 +-
.../java/org/apache/any23/rdf/RDFUtils.java | 82 +-
.../XMLValidationReportSerializer.java | 21 +-
.../any23/validator/rule/AboutNotURIRule.java | 1 +
.../validator/rule/MetaNameMisuseRule.java | 1 +
.../org/apache/any23/vocab/RDFSchemaUtils.java | 24 +-
.../any23/extractor/csv/CSVExtractorTest.java | 178 +-
.../html/AbstractExtractorTestCase.java | 1592 ++++++++-------
.../extractor/html/HCardExtractorTest.java | 1852 +++++++++---------
.../extractor/html/HListingExtractorTest.java | 3 -
.../microdata/MicrodataParserTest.java | 81 +-
.../any23/extractor/rdfa/RDFaExtractorTest.java | 2 -
.../TemplateXPathExtractorRuleImplTest.java | 24 +-
.../any23/filter/IgnoreAccidentalRDFaTest.java | 2 +-
.../org/apache/any23/writer/JSONWriterTest.java | 4 -
.../org/apache/any23/servlet/ServletTest.java | 4 -
....2.1-non-normative-example-1-expected.nquads | 8 +-
.../5.2.1-non-normative-example-1.html | 48 +-
....2.1-non-normative-example-2-expected.nquads | 33 +-
.../5.2.1-non-normative-example-2.html | 16 +-
.../microdata-basic-expected.properties | 6 +-
.../resources/microdata/microdata-basic.html | 15 +-
.../microdata-itemref-expected.properties | 20 +-
.../resources/microdata/microdata-itemref.html | 46 +-
.../microdata/microdata-json-serialization.json | 2 +-
.../microdata/microdata-nested-expected.nquads | 19 +-
.../microdata-nested-expected.properties | 4 +-
.../resources/microdata/microdata-nested.html | 33 +-
.../microdata-richsnippet-expected.nquads | 27 +-
44 files changed, 2462 insertions(+), 2515 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
index 98616ba..97bf4f2 100644
--- a/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/ExtractorDocumentationTest.java
@@ -42,13 +42,11 @@ public class ExtractorDocumentationTest extends ToolTestBase {
runToolCheckExit0("--all");
}
- //@Ignore("no available example")
@Test
public void testExampleInput() throws Exception {
runToolCheckExit0("-i", TARGET_EXTRACTOR);
}
- //@Ignore("no available example")
@Test
public void testExampleOutput() throws Exception {
runToolCheckExit0("-o", TARGET_EXTRACTOR);
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/cli/src/test/java/org/apache/any23/cli/RoverTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/RoverTest.java b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
index 893220a..7bab314 100644
--- a/cli/src/test/java/org/apache/any23/cli/RoverTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/RoverTest.java
@@ -23,7 +23,6 @@ import org.apache.any23.util.StringUtils;
import org.apache.any23.util.URLUtils;
import org.junit.Assert;
import org.junit.Assume;
-import org.junit.Ignore;
import org.junit.Test;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.rio.RDFFormat;
@@ -36,7 +35,6 @@ import java.util.Arrays;
*
* @author Michele Mostarda (mostarda@fbk.eu)
*/
-@Ignore("Twitter microdata not parsing correctly right now")
public class RoverTest extends ToolTestBase {
private static final String[] TARGET_FILES = {
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
----------------------------------------------------------------------
diff --git a/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java b/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
index f659539..b4c10ad 100644
--- a/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
+++ b/cli/src/test/java/org/apache/any23/cli/SimpleRoverTest.java
@@ -22,7 +22,6 @@ import java.util.Collection;
import org.apache.any23.util.FileUtils;
import org.apache.pdfbox.util.Charsets;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -104,7 +103,6 @@ public class SimpleRoverTest extends ToolTestBase {
* @throws Exception
*/
@Test
- @Ignore
public void ref310ExtendedTest()
throws Exception {
File outputFile = File.createTempFile("rover-test", ".ttl", tempDirectory);
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
index 7536304..e72162b 100644
--- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
@@ -58,12 +58,15 @@ public class CSVExtractor implements Extractor.ContentExtractor {
/**
* {@inheritDoc}
*/
+ @Override
public void setStopAtFirstError(boolean f) {
+ //not implemented
}
/**
* {@inheritDoc}
*/
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -85,7 +88,7 @@ public class CSVExtractor implements Extractor.ContentExtractor {
String[] nextLine;
int index = 0;
while ((nextLine = csvParser.getLine()) != null) {
- IRI rowSubject = RDFUtils.uri(
+ IRI rowSubject = RDFUtils.iri(
documentIRI.toString(),
"row/" + index
);
@@ -194,11 +197,11 @@ public class CSVExtractor implements Extractor.ContentExtractor {
}
private IRI normalize(String toBeNormalized, IRI documentIRI) {
- toBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
+ String newToBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", "");
StringBuilder result = new StringBuilder(documentIRI.toString());
- StringTokenizer tokenizer = new StringTokenizer(toBeNormalized, " ");
+ StringTokenizer tokenizer = new StringTokenizer(newToBeNormalized, " ");
while (tokenizer.hasMoreTokens()) {
String current = tokenizer.nextToken();
@@ -228,7 +231,7 @@ public class CSVExtractor implements Extractor.ContentExtractor {
// there are some row cells that don't have an associated column name
break;
}
- if (cell.equals("")) {
+ if ("".equals(cell)) {
index++;
continue;
}
@@ -241,17 +244,17 @@ public class CSVExtractor implements Extractor.ContentExtractor {
private Value getObjectFromCell(String cell) {
Value object;
- cell = cell.trim();
- if (RDFUtils.isAbsoluteIRI(cell)) {
- object = SimpleValueFactory.getInstance().createIRI(cell);
+ String newCell = cell.trim();
+ if (RDFUtils.isAbsoluteIRI(newCell)) {
+ object = SimpleValueFactory.getInstance().createIRI(newCell);
} else {
IRI datatype = XMLSchema.STRING;
- if (isInteger(cell)) {
+ if (isInteger(newCell)) {
datatype = XMLSchema.INTEGER;
- } else if(isFloat(cell)) {
+ } else if(isFloat(newCell)) {
datatype = XMLSchema.FLOAT;
}
- object = SimpleValueFactory.getInstance().createLiteral(cell, datatype);
+ object = SimpleValueFactory.getInstance().createLiteral(newCell, datatype);
}
return object;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index db58586..34728e5 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -28,7 +28,6 @@ import org.apache.any23.extractor.rdf.JSONLDExtractorFactory;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
@@ -52,205 +51,167 @@ import java.util.Set;
*/
public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
- private static final SINDICE vSINDICE = SINDICE.getInstance();
-
- private IRI profile;
-
- private Map<String, IRI> prefixes = new HashMap<>();
-
- private String documentLang;
-
- private JSONLDExtractor extractor;
-
- /**
- * {@inheritDoc}
- */
- @Override
- public void run(ExtractionParameters extractionParameters,
- ExtractionContext extractionContext, Document in,
- ExtractionResult out) throws IOException, ExtractionException {
- profile = extractProfile(in);
- documentLang = getDocumentLanguage(in);
- extractLinkDefinedPrefixes(in);
-
- String baseProfile = vSINDICE.NS;
- if (profile != null) {
- baseProfile = profile.toString();
- }
-
- final IRI documentIRI = extractionContext.getDocumentIRI();
- Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile,
- extractionParameters, extractionContext, out);
- for (JSONLDScript jsonldScript : jsonldScripts) {
- //String lang = documentLang;
- //if (jsonldScript.getLang() != null) {
- // lang = jsonldScript.getLang();
- //}
- //out.writeTriple(documentIRI, jsonldScript.getName(),
- // SimpleValueFactory.getInstance().createLiteral(jsonldScript.getContent(), lang));
- }
- }
-
- /**
- * Returns the {@link Document} language if declared, <code>null</code>
- * otherwise.
- *
- * @param in
- * a instance of {@link Document}.
- * @return the language declared, could be <code>null</code>.
- */
- private String getDocumentLanguage(Document in) {
- String lang = DomUtils.find(in, "string(/HTML/@lang)");
- if (lang.equals("")) {
- return null;
- }
- return lang;
- }
-
- private IRI extractProfile(Document in) {
- String profile = DomUtils.find(in, "string(/HTML/@profile)");
- if (profile.equals("")) {
- return null;
- }
- return SimpleValueFactory.getInstance().createIRI(profile);
- }
-
- /**
- * It extracts prefixes defined in the <i>LINK</i> meta tags.
- *
- * @param in
- */
- private void extractLinkDefinedPrefixes(Document in) {
- List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK");
- for (Node linkNode : linkNodes) {
- NamedNodeMap attributes = linkNode.getAttributes();
- String rel = attributes.getNamedItem("rel").getTextContent();
- String href = attributes.getNamedItem("href").getTextContent();
- if (rel != null && href != null && RDFUtils.isAbsoluteIRI(href)) {
- prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href));
- }
- }
- }
-
- private Set<JSONLDScript> extractJSONLDScript(Document in,
- String baseProfile, ExtractionParameters extractionParameters,
- ExtractionContext extractionContext, ExtractionResult out)
- throws IOException, ExtractionException {
- List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
- Set<JSONLDScript> result = new HashSet<>();
- extractor = new JSONLDExtractorFactory().createExtractor();
- for (Node jsonldNode : scriptNodes) {
- NamedNodeMap attributes = jsonldNode.getAttributes();
- for (int i = 0; i < attributes.getLength(); i++) {
- if (attributes.item(i).getTextContent()
- .equalsIgnoreCase("application/ld+json")) {
- extractor.run(extractionParameters, extractionContext,
- DomUtils.nodeToInputStream(jsonldNode
- .getFirstChild()), out);
- }
- }
- Node nameAttribute = attributes.getNamedItem("name");
- Node contentAttribute = attributes.getNamedItem("content");
- if (nameAttribute == null || contentAttribute == null) {
- continue;
- }
- String name = nameAttribute.getTextContent();
- String content = contentAttribute.getTextContent();
- String xpath = DomUtils.getXPathForNode(jsonldNode);
- IRI nameAsIRI = getPrefixIfExists(name);
- if (nameAsIRI == null) {
- nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name);
- }
- JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsIRI,
- content);
- result.add(jsonldScript);
- }
- return result;
- }
-
- private IRI getPrefixIfExists(String name) {
- String[] split = name.split("\\.");
- if (split.length == 2 && prefixes.containsKey(split[0])) {
- return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]);
- }
- return null;
- }
-
- @Override
- public ExtractorDescription getDescription() {
- return EmbeddedJSONLDExtractorFactory.getDescriptionInstance();
- }
-
- private class JSONLDScript {
-
- private String xpath;
-
- private IRI name;
-
- private String lang;
-
- private String content;
-
- public JSONLDScript(String xpath, IRI name, String content) {
- this.xpath = xpath;
- this.name = name;
- this.content = content;
- }
-
- public JSONLDScript(String xpath, IRI name, String content, String lang) {
- this(xpath, name, content);
- this.lang = lang;
- }
-
- public IRI getName() {
- return name;
- }
-
- public void setName(IRI name) {
- this.name = name;
- }
-
- public String getLang() {
- return lang;
- }
-
- public void setLang(String lang) {
- this.lang = lang;
- }
-
- public String getContent() {
- return content;
- }
-
- public void setContent(String content) {
- this.content = content;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) {
- return true;
- }
- if (o == null) {
- return false;
- }
- if (!(o instanceof JSONLDScript)) {
- return false;
- }
-
- JSONLDScript meta = (JSONLDScript) o;
-
- if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) {
- return false;
- }
-
- return true;
- }
-
- @Override
- public int hashCode() {
- return xpath != null ? xpath.hashCode() : 0;
- }
- }
+ private static final SINDICE vSINDICE = SINDICE.getInstance();
+
+ private IRI profile;
+
+ private Map<String, IRI> prefixes = new HashMap<>();
+
+ private String documentLang;
+
+ private JSONLDExtractor extractor;
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void run(ExtractionParameters extractionParameters,
+ ExtractionContext extractionContext, Document in,
+ ExtractionResult out) throws IOException, ExtractionException {
+ profile = extractProfile(in);
+ documentLang = getDocumentLanguage(in);
+ extractLinkDefinedPrefixes(in);
+
+ String baseProfile = vSINDICE.NS;
+ if (profile != null) {
+ baseProfile = profile.toString();
+ }
+
+ extractionContext.getDocumentIRI();
+ Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile,
+ extractionParameters, extractionContext, out);
+ for (JSONLDScript jsonldScript : jsonldScripts) {
+ //String lang = documentLang;
+ //if (jsonldScript.getLang() != null) {
+ // lang = jsonldScript.getLang();
+ //}
+ //out.writeTriple(documentIRI, jsonldScript.getName(),
+ // SimpleValueFactory.getInstance().createLiteral(jsonldScript.getContent(), lang));
+ }
+ }
+
+ /**
+ * Returns the {@link Document} language if declared, <code>null</code>
+ * otherwise.
+ *
+ * @param in
+ * a instance of {@link Document}.
+ * @return the language declared, could be <code>null</code>.
+ */
+ private String getDocumentLanguage(Document in) {
+ String lang = DomUtils.find(in, "string(/HTML/@lang)");
+ if ("".equals(lang)) {
+ return null;
+ }
+ return lang;
+ }
+
+ private IRI extractProfile(Document in) {
+ String profile = DomUtils.find(in, "string(/HTML/@profile)");
+ if ("".equals(profile)) {
+ return null;
+ }
+ return SimpleValueFactory.getInstance().createIRI(profile);
+ }
+
+ /**
+ * It extracts prefixes defined in the <i>LINK</i> meta tags.
+ *
+ * @param in
+ */
+ private void extractLinkDefinedPrefixes(Document in) {
+ List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK");
+ for (Node linkNode : linkNodes) {
+ NamedNodeMap attributes = linkNode.getAttributes();
+ String rel = attributes.getNamedItem("rel").getTextContent();
+ String href = attributes.getNamedItem("href").getTextContent();
+ if (rel != null && href != null && RDFUtils.isAbsoluteIRI(href)) {
+ prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href));
+ }
+ }
+ }
+
+ private Set<JSONLDScript> extractJSONLDScript(Document in,
+ String baseProfile, ExtractionParameters extractionParameters,
+ ExtractionContext extractionContext, ExtractionResult out)
+ throws IOException, ExtractionException {
+ List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
+ Set<JSONLDScript> result = new HashSet<>();
+ extractor = new JSONLDExtractorFactory().createExtractor();
+ for (Node jsonldNode : scriptNodes) {
+ NamedNodeMap attributes = jsonldNode.getAttributes();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ if ("application/ld+json".equalsIgnoreCase(attributes.item(i).getTextContent())) {
+ extractor.run(extractionParameters, extractionContext,
+ DomUtils.nodeToInputStream(jsonldNode
+ .getFirstChild()), out);
+ }
+ }
+ Node nameAttribute = attributes.getNamedItem("name");
+ Node contentAttribute = attributes.getNamedItem("content");
+ if (nameAttribute == null || contentAttribute == null) {
+ continue;
+ }
+ String name = nameAttribute.getTextContent();
+ String content = contentAttribute.getTextContent();
+ String xpath = DomUtils.getXPathForNode(jsonldNode);
+ IRI nameAsIRI = getPrefixIfExists(name);
+ if (nameAsIRI == null) {
+ nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name);
+ }
+ JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsIRI,
+ content);
+ result.add(jsonldScript);
+ }
+ return result;
+ }
+
+ private IRI getPrefixIfExists(String name) {
+ String[] split = name.split("\\.");
+ if (split.length == 2 && prefixes.containsKey(split[0])) {
+ return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]);
+ }
+ return null;
+ }
+
+ @Override
+ public ExtractorDescription getDescription() {
+ return EmbeddedJSONLDExtractorFactory.getDescriptionInstance();
+ }
+
+ private class JSONLDScript {
+
+ private String xpath;
+
+ public JSONLDScript(String xpath, IRI name, String content) {
+ this.xpath = xpath;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null) {
+ return false;
+ }
+ if (!(o instanceof JSONLDScript)) {
+ return false;
+ }
+
+ JSONLDScript meta = (JSONLDScript) o;
+
+ if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return xpath != null ? xpath.hashCode() : 0;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
index e67ec42..a3c6550 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
@@ -26,7 +26,6 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.eclipse.rdf4j.model.IRI;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
@@ -51,7 +50,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
private IRI profile;
- private Map<String, IRI> prefixes = new HashMap<String, IRI>();
+ private Map<String, IRI> prefixes = new HashMap<>();
private String documentLang;
@@ -82,25 +81,25 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
lang = meta.getLang();
}
if(meta.isPragmaDirective){
- if(lang != null) {
- out.writeTriple(
+ if(lang != null) {
+ out.writeTriple(
documentIRI,
meta.getHttpEquiv(),
SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang));
- } else {
+ } else {
out.writeTriple(
documentIRI,
meta.getHttpEquiv(),
SimpleValueFactory.getInstance().createLiteral(meta.getContent()));
- }
- }else {
- if(lang != null) {
- out.writeTriple(
+ }
+ } else {
+ if(lang != null) {
+ out.writeTriple(
documentIRI,
meta.getName(),
SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang));
- } else {
- out.writeTriple(
+ } else {
+ out.writeTriple(
documentIRI,
meta.getName(),
SimpleValueFactory.getInstance().createLiteral(meta.getContent()));
@@ -117,7 +116,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
*/
private String getDocumentLanguage(Document in) {
String lang = DomUtils.find(in, "string(/HTML/@lang)");
- if (lang.equals("")) {
+ if ("".equals(lang)) {
return null;
}
return lang;
@@ -125,7 +124,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
private IRI extractProfile(Document in) {
String profile = DomUtils.find(in, "string(/HTML/@profile)");
- if (profile.equals("")) {
+ if ("".equals(profile)) {
return null;
}
return SimpleValueFactory.getInstance().createIRI(profile);
@@ -150,7 +149,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
private Set<Meta> extractMetaElement(Document in, String baseProfile) {
List<Node> metaNodes = DomUtils.findAll(in, "/HTML/HEAD/META");
- Set<Meta> result = new HashSet<Meta>();
+ Set<Meta> result = new HashSet<>();
for (Node metaNode : metaNodes) {
NamedNodeMap attributes = metaNode.getAttributes();
Node nameAttribute = attributes.getNamedItem("name");
@@ -223,6 +222,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
this.setPragmaDirective(true);
}
+ @SuppressWarnings("unused")
public Meta(String xpath, String content, IRI httpEquiv, String lang) {
this(xpath,content,httpEquiv);
this.lang = lang;
@@ -234,15 +234,12 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
this.content = content;
}
+ @SuppressWarnings("unused")
public Meta(String xpath, IRI name, String content, String lang) {
this(xpath, name, content);
this.lang = lang;
}
- public boolean isPragmaDirective(){
- return isPragmaDirective;
- }
-
private void setPragmaDirective(boolean value){
this.isPragmaDirective=value;
}
@@ -251,42 +248,29 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
return httpEquiv;
}
- public void setHttpEquiv(IRI httpEquiv){
- this.httpEquiv=httpEquiv;
- }
-
public IRI getName() {
return name;
}
- public void setName(IRI name) {
- this.name = name;
- }
-
public String getLang() {
return lang;
}
- public void setLang(String lang) {
- this.lang = lang;
- }
-
public String getContent() {
return content;
}
- public void setContent(String content) {
- this.content = content;
- }
-
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
+ if (this == o)
+ return true;
+ if (o == null || getClass() != o.getClass())
+ return false;
Meta meta = (Meta) o;
- if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) return false;
+ if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null)
+ return false;
return true;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/Item.java b/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
index 04ffa66..769b0d2 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/Item.java
@@ -30,11 +30,6 @@ public abstract class Item {
private final String xpath;
/**
- * @return the <b>JSON</b> representation for this item.
- */
- public abstract String toJSON();
-
- /**
* Constructor.
*
* @param xpath xpath to this item in container document.
@@ -47,6 +42,11 @@ public abstract class Item {
}
/**
+ * @return the <b>JSON</b> representation for this item.
+ */
+ public abstract String toJSON();
+
+ /**
* @return the item location in container document.
*/
public String getXpath() {
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
index 0688fb8..f32b468 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemPropValue.java
@@ -31,7 +31,17 @@ import org.apache.any23.util.StringUtils;
*/
public class ItemPropValue {
- private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<SimpleDateFormat>();
+ /**
+ * Internal content value.
+ */
+ private final Object content;
+
+ /**
+ * Content type.
+ */
+ private final Type type;
+
+ private static final ThreadLocal<SimpleDateFormat> sdf = new ThreadLocal<>();
/**
* Supported types.
@@ -61,16 +71,6 @@ public class ItemPropValue {
}
/**
- * Internal content value.
- */
- private final Object content;
-
- /**
- * Content type.
- */
- private final Type type;
-
- /**
* Constructor.
*
* @param content content object.
@@ -94,12 +94,11 @@ public class ItemPropValue {
);
}
if(content instanceof String && ((String) content).trim().length() == 0) {
- content = "Null";
// ANY23-115 Empty spans seem to break ANY23
// instead of throwing the exception and in effect failing the entire
// parse job we wish to be lenient on web content publishers and add
// Null (String) as content.
- //throw new IllegalArgumentException("Invalid content '" + content + "'");
+ content = "Null";
}
this.content = content;
this.type = type;
@@ -151,7 +150,8 @@ public class ItemPropValue {
* @return <code>true</code> if type is an integer.
*/
public boolean isInteger() {
- if(type != Type.Plain) return false;
+ if(type != Type.Plain)
+ return false;
try {
Integer.parseInt((String) content);
return true;
@@ -164,7 +164,8 @@ public class ItemPropValue {
* @return <code>true</code> if type is a float.
*/
public boolean isFloat() {
- if(type != Type.Plain) return false;
+ if(type != Type.Plain)
+ return false;
try {
Float.parseFloat((String) content);
return true;
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
index 5f817f5..f36828a 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/ItemScope.java
@@ -87,17 +87,17 @@ public class ItemScope extends Item {
this.refs = refs;
this.itemId = itemId;
- final Map<String, List<ItemProp>> tmpProperties = new HashMap<String, List<ItemProp>>();
+ final Map<String, List<ItemProp>> tmpProperties = new HashMap<>();
for (ItemProp itemProp : itemProps) {
final String propName = itemProp.getName();
List<ItemProp> propList = tmpProperties.get(propName);
if (propList == null) {
- propList = new ArrayList<ItemProp>();
+ propList = new ArrayList<>();
tmpProperties.put(propName, propList);
}
propList.add(itemProp);
}
- final Map<String, List<ItemProp>> properties = new HashMap<String, List<ItemProp>>();
+ final Map<String, List<ItemProp>> properties = new HashMap<>();
for (Map.Entry<String, List<ItemProp>> propertiesEntry : tmpProperties.entrySet()) {
properties.put(
propertiesEntry.getKey(),
@@ -147,7 +147,8 @@ public class ItemScope extends Item {
@Override
public String toJSON() {
StringBuilder sb = new StringBuilder();
- int i, j;
+ int i;
+ int j;
final Collection<List<ItemProp>> itemPropsList = properties.values();
j = 0;
for (List<ItemProp> itemProps : itemPropsList) {
@@ -184,12 +185,12 @@ public class ItemScope extends Item {
@Override
public int hashCode() {
- int i = properties == null ? 0 : properties.hashCode();
- i += id == null ? 0 : id.hashCode();
- i += refs == null ? 0 : refs.hashCode();
- i += type == null ? 0 : type.hashCode();
- i += itemId == null ? 0 : itemId.hashCode();
- return i;
+ int i = properties == null ? 0 : properties.hashCode();
+ i += id == null ? 0 : id.hashCode();
+ i += refs == null ? 0 : Arrays.hashCode(refs);
+ i += type == null ? 0 : type.hashCode();
+ i += itemId == null ? 0 : itemId.hashCode();
+ return i;
}
@Override
@@ -221,15 +222,17 @@ public class ItemScope extends Item {
protected void acquireProperty(ItemProp itemProp) {
List<ItemProp> itemProps = properties.get(itemProp.getName());
if (itemProps == null) {
- itemProps = new ArrayList<ItemProp>();
+ itemProps = new ArrayList<>();
properties.put(itemProp.getName(), itemProps);
}
- if (!itemProps.contains(itemProp)) itemProps.add(itemProp);
+ if (!itemProps.contains(itemProp))
+ itemProps.add(itemProp);
}
protected void disownProperty(ItemProp itemProp) {
List<ItemProp> propList = properties.get(itemProp.getName());
- if (propList != null) propList.remove(itemProp);
+ if (propList != null)
+ propList.remove(itemProp);
}
private String toJSON(String[] in) {
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
index 636f4d2..aa01dfe 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
@@ -106,7 +106,7 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
* 5.2.6
*/
final IRI documentIRI = extractionContext.getDocumentIRI();
- final Map<ItemScope, Resource> mappings = new HashMap<ItemScope, Resource>();
+ final Map<ItemScope, Resource> mappings = new HashMap<>();
for (ItemScope itemScope : itemScopes) {
Resource subject = processType(itemScope, documentIRI, out, mappings);
out.writeTriple(
@@ -143,7 +143,7 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
*/
private String getDocumentLanguage(Document in) {
String lang = DomUtils.find(in, "string(/HTML/@lang)");
- if (lang.equals("")) {
+ if ("".equals(lang)) {
return null;
}
return lang;
@@ -256,13 +256,13 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
}
}
String[] relTokens = rel.getTextContent().split(" ");
- Set<String> tokensWithNoDuplicates = new HashSet<String>();
+ Set<String> tokensWithNoDuplicates = new HashSet<>();
for (String relToken : relTokens) {
if (relToken.contains(":")) {
// if contain semi-colon, skip
continue;
}
- if (relToken.equals("alternate") || relToken.equals("stylesheet")) {
+ if ("alternate".equals(relToken) || "stylesheet".equals(relToken)) {
tokensWithNoDuplicates.add("ALTERNATE-STYLESHEET");
continue;
}
@@ -295,7 +295,7 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
NodeList metas = in.getElementsByTagName("meta");
for (int i = 0; i < metas.getLength(); i++) {
Node meta = metas.item(i);
- String name = DomUtils.readAttribute(meta, "name" , null);
+ String name = DomUtils.readAttribute(meta, "name", null);
String content = DomUtils.readAttribute(meta, "content", null);
if (name != null && content != null) {
if (isAbsoluteURL(name)) {
@@ -482,20 +482,18 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
ExtractionResult out
) throws MalformedURLException, ExtractionException {
IRI predicate;
- if (!isAbsoluteURL(propName) && itemScopeType.equals("") && isStrict) {
+ if (!isAbsoluteURL(propName) && "".equals(itemScopeType) && isStrict) {
return;
- } else if (!isAbsoluteURL(propName) && itemScopeType.equals("") && !isStrict) {
+ } else if (!isAbsoluteURL(propName) && "".equals(itemScopeType) && !isStrict) {
predicate = RDFUtils.iri(toAbsoluteURL(
- defaultNamespace,
- propName,
- '/'
- ).toString());
+ defaultNamespace,
+ propName,
+ '/').toString());
} else {
predicate = RDFUtils.iri(toAbsoluteURL(
- itemScopeType,
- propName,
- '/'
- ).toString());
+ itemScopeType,
+ propName,
+ '/').toString());
}
Value value;
Object propValue = itemProp.getValue().getContent();
@@ -506,10 +504,9 @@ public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
value = RDFUtils.literal((String) propValue, documentLanguage);
} else if (propType.equals(ItemPropValue.Type.Link)) {
value = RDFUtils.iri(toAbsoluteURL(
- documentIRI.toString(),
- (String) propValue,
- '/'
- ).toString());
+ documentIRI.toString(),
+ (String) propValue,
+ '/').toString());
} else if (propType.equals(ItemPropValue.Type.Date)) {
value = RDFUtils.literal(ItemPropValue.formatDateTime((Date) propValue), XMLSchema.DATE);
} else {
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
index 147fd18..cf05e35 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataParser.java
@@ -48,54 +48,36 @@ public class MicrodataParser {
enum ErrorMode {
/** This mode raises an exception at first encountered error. */
- StopAtFirstError,
+ STOP_AT_FIRST_ERROR,
/** This mode produces a full error report. */
- FullReport
+ FULL_REPORT
}
- public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
- public static final String ITEMPROP_ATTRIBUTE = "itemprop";
-
- /**
- * List of tags providing the <code>src</code> property.
- */
- public static final Set<String> SRC_TAGS = Collections.unmodifiableSet(
- new HashSet<String>( Arrays.asList("audio", "embed", "frame", "iframe", "img",
- "source", "track", "video", "input", "layer", "script", "textarea") )
- );
-
- /**
- * List of tags providing the <code>href</code> property.
- */
- public static final Set<String> HREF_TAGS = Collections.unmodifiableSet(
- new HashSet<String>( Arrays.asList("a", "area", "link") )
- );
-
private final Document document;
/**
* This set holds the name of properties being dereferenced.
- * The {@link #deferProperties(String...)} checks first if the
+ * The {@link #deferProperties(Node, String...)} checks first if the
* required dereference has been already asked, if so raises
* a loop detection error. This map works in coordination
* with {@link #dereferenceRecursionCounter}, so that at the end of
- * {@link #deferProperties(String...)} call recursion the
+ * {@link #deferProperties(Node, String...)} call recursion the
* {@link #loopDetectorSet} can be cleaned up.
*/
- private final Set<String> loopDetectorSet = new HashSet<String>();
+ private final Set<String> loopDetectorSet = new HashSet<>();
/**
* {@link ItemScope} cache.
*/
- private final Map<Node,ItemScope> itemScopes = new HashMap<Node,ItemScope>();
+ private final Map<Node,ItemScope> itemScopes = new HashMap<>();
/**
* {@link ItemPropValue} cache.
*/
- private final Map<Node, ItemPropValue> itemPropValues = new HashMap<Node, ItemPropValue>();
+ private final Map<Node, ItemPropValue> itemPropValues = new HashMap<>();
/**
- * Counts the recursive call of {@link #deferProperties(String...)}.
+ * Counts the recursive call of {@link #deferProperties(Node, String...)}.
* It helps to cleanup the {@link #loopDetectorSet} when recursion ends.
*/
private int dereferenceRecursionCounter = 0;
@@ -103,12 +85,37 @@ public class MicrodataParser {
/**
* Current error mode.
*/
- private ErrorMode errorMode = ErrorMode.FullReport;
+ private ErrorMode errorMode = ErrorMode.FULL_REPORT;
/**
* List of collected errors. Used when {@link #errorMode} <code>==</code> {@link ErrorMode#FullReport}.
*/
- private List<MicrodataParserException> errors = new ArrayList<MicrodataParserException>();
+ private List<MicrodataParserException> errors = new ArrayList<>();
+
+ public static final String ITEMSCOPE_ATTRIBUTE = "itemscope";
+ public static final String ITEMPROP_ATTRIBUTE = "itemprop";
+
+ /**
+ * List of tags providing the <code>src</code> property.
+ */
+ public static final Set<String> SRC_TAGS = Collections.unmodifiableSet(
+ new HashSet<String>( Arrays.asList("audio", "embed", "frame", "iframe", "img",
+ "source", "track", "video", "input", "layer", "script", "textarea") )
+ );
+
+ /**
+ * List of tags providing the <code>href</code> property.
+ */
+ public static final Set<String> HREF_TAGS = Collections.unmodifiableSet(
+ new HashSet<String>( Arrays.asList("a", "area", "link") )
+ );
+
+ public MicrodataParser(Document document) {
+ if(document == null) {
+ throw new NullPointerException("Document cannot be null.");
+ }
+ this.document = document;
+ }
/**
* Returns all the <i>itemScope</i>s detected within the given root node.
@@ -158,7 +165,7 @@ public class MicrodataParser {
*/
public static List<Node> getTopLevelItemScopeNodes(Node node) {
final List<Node> itemScopes = getItemScopeNodes(node);
- final List<Node> topLevelItemScopes = new ArrayList<Node>();
+ final List<Node> topLevelItemScopes = new ArrayList<>();
for(Node itemScope : itemScopes) {
if( ! isItemProp(itemScope) ) {
topLevelItemScopes.add(itemScope);
@@ -176,13 +183,13 @@ public class MicrodataParser {
* @param errorMode error management policy.
* @return list of <b>itemscope</b> items.
* @throws MicrodataParserException if
- * <code>errorMode == {@link org.apache.any23.extractor.microdata.MicrodataParser.ErrorMode#StopAtFirstError}</code>
+ * <code>errorMode == {@link org.apache.any23.extractor.microdata.MicrodataParser.ErrorMode#STOP_AT_FIRST_ERROR}</code>
* and an error occurs.
*/
public static MicrodataParserReport getMicrodata(Document document, ErrorMode errorMode)
throws MicrodataParserException {
final List<Node> itemNodes = getTopLevelItemScopeNodes(document);
- final List<ItemScope> items = new ArrayList<ItemScope>();
+ final List<ItemScope> items = new ArrayList<>();
final MicrodataParser microdataParser = new MicrodataParser(document);
microdataParser.setErrorMode(errorMode);
for(Node itemNode : itemNodes) {
@@ -203,7 +210,7 @@ public class MicrodataParser {
*/
public static MicrodataParserReport getMicrodata(Document document) {
try {
- return getMicrodata(document, ErrorMode.FullReport);
+ return getMicrodata(document, ErrorMode.FULL_REPORT);
} catch (MicrodataParserException mpe) {
throw new IllegalStateException("Unexpected exception.", mpe);
}
@@ -255,12 +262,14 @@ public class MicrodataParser {
* @param candidates list of candidate nodes.
* @return list of unnested nodes.
*/
+ @SuppressWarnings("unused")
private static List<Node> getUnnestedNodes(List<Node> candidates) {
- final List<Node> unnesteds = new ArrayList<Node>();
+ final List<Node> unnesteds = new ArrayList<>();
for(int i = 0; i < candidates.size(); i++) {
boolean skip = false;
for(int j = 0; j < candidates.size(); j++) {
- if(i == j) continue;
+ if(i == j)
+ continue;
if( DomUtils.isAncestorOf(candidates.get(j), candidates.get(i), true) ) {
skip = true;
break;
@@ -273,15 +282,9 @@ public class MicrodataParser {
return unnesteds;
}
- public MicrodataParser(Document document) {
- if(document == null) {
- throw new NullPointerException("Document cannot be null.");
- }
- this.document = document;
- }
-
public void setErrorMode(ErrorMode errorMode) {
- if(errorMode == null) throw new IllegalArgumentException("errorMode must be not null.");
+ if(errorMode == null)
+ throw new IllegalArgumentException("errorMode must be not null.");
this.errorMode = errorMode;
}
@@ -306,7 +309,8 @@ public class MicrodataParser {
*/
public ItemPropValue getPropertyValue(Node node) throws MicrodataParserException {
final ItemPropValue itemPropValue = itemPropValues.get(node);
- if(itemPropValue != null) return itemPropValue;
+ if(itemPropValue != null)
+ return itemPropValue;
final String nodeName = node.getNodeName().toLowerCase();
if (DomUtils.hasAttribute(node, "content")) {
@@ -338,7 +342,7 @@ public class MicrodataParser {
}
if( isItemScope(node) ) {
- return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested );
+ return new ItemPropValue( getItemScope(node), ItemPropValue.Type.Nested);
}
final ItemPropValue newItemPropValue = new ItemPropValue( node.getTextContent(), ItemPropValue.Type.Plain);
@@ -356,7 +360,7 @@ public class MicrodataParser {
* @throws MicrodataParserException if an error occurs while retrieving an property value.
*/
public List<ItemProp> getItemProps(final Node scopeNode, boolean skipRoot) throws MicrodataParserException {
- final Set<Node> accepted = new LinkedHashSet<Node>();
+ final Set<Node> accepted = new LinkedHashSet<>();
if (!skipRoot) {
NamedNodeMap attributes = scopeNode.getAttributes();
@@ -375,19 +379,20 @@ public class MicrodataParser {
if (attributes.getNamedItem(ITEMPROP_ATTRIBUTE) != null && !scopeNode.equals(node)) {
accepted.add(node);
}
- if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
- // Don't visit descendants of nodes that define a new scope
- return FILTER_REJECT;
- }
+// ANY23-131 Nested Microdata are not extracted
+// if (attributes.getNamedItem(ITEMSCOPE_ATTRIBUTE) != null) {
+// // Don't visit descendants of nodes that define a new scope
+// return FILTER_REJECT;
+// }
}
return FILTER_ACCEPT;
}
}, false);
// To populate accepted we only need to walk the tree.
- while (treeWalker.nextNode() != null);
+ while (treeWalker.nextNode() != null);
- final List<ItemProp> result = new ArrayList<ItemProp>();
+ final List<ItemProp> result = new ArrayList<>();
for(Node itemPropNode : accepted) {
final String itemProp = DomUtils.readAttribute(itemPropNode, ITEMPROP_ATTRIBUTE, null);
final String[] propertyNames = itemProp.split(" ");
@@ -414,14 +419,15 @@ public class MicrodataParser {
/**
* Given a document and a list of <b>itemprop</b> names this method will return
* such <b>itemprops</b>.
- *
+ *
+ * @param node a {@link org.w3c.dom.Node} to which the refs belong
* @param refs list of references.
* @return list of retrieved <b>itemprop</b>s.
* @throws MicrodataParserException if a loop is detected or a property name is missing.
*/
- public ItemProp[] deferProperties(String... refs) throws MicrodataParserException {
+ public ItemProp[] deferProperties(Node node, String... refs) throws MicrodataParserException {
dereferenceRecursionCounter++;
- final List<ItemProp> result = new ArrayList<ItemProp>();
+ final List<ItemProp> result = new ArrayList<>();
try {
for (String ref : refs) {
if (loopDetectorSet.contains(ref)) {
@@ -434,18 +440,22 @@ public class MicrodataParser {
);
}
loopDetectorSet.add(ref);
- final Element element = document.getElementById(ref);
+ Element element = (Element) node;
if (element == null) {
manageError(
new MicrodataParserException( String.format("Unknown itemProp id '%s'", ref ), null )
);
continue;
}
- result.addAll(getItemProps(element, false));
+ List<ItemProp> propList = getItemProps(element, false);
+ if (!result.containsAll(propList)) {
+ result.addAll(propList);
+ }
}
} catch (MicrodataParserException mpe) {
if(dereferenceRecursionCounter == 1)
- manageError(mpe); else throw mpe; // Recursion end, this the the top call.
+ manageError(mpe);
+ else throw mpe; // Recursion end, this the the top call.
} finally {
dereferenceRecursionCounter--;
if(dereferenceRecursionCounter == 0) { // Recursion end, this the the top call.
@@ -464,7 +474,8 @@ public class MicrodataParser {
*/
public ItemScope getItemScope(Node node) throws MicrodataParserException {
final ItemScope itemScope = itemScopes.get(node);
- if(itemScope != null) return itemScope;
+ if(itemScope != null)
+ return itemScope;
final String id = DomUtils.readAttribute(node, "id" , null);
final String itemref = DomUtils.readAttribute(node, "itemref" , null);
@@ -475,7 +486,7 @@ public class MicrodataParser {
final String[] itemrefIDs = itemref == null ? new String[0] : itemref.split(" ");
final ItemProp[] deferredProperties;
try {
- deferredProperties = deferProperties(itemrefIDs);
+ deferredProperties = deferProperties(node, itemrefIDs);
} catch (MicrodataParserException mpe) {
mpe.setErrorNode(node);
throw mpe;
@@ -506,12 +517,13 @@ public class MicrodataParser {
}
private void manageError(MicrodataParserException mpe) throws MicrodataParserException {
- if(errorMode == ErrorMode.StopAtFirstError) {
+ if(errorMode == ErrorMode.STOP_AT_FIRST_ERROR) {
throw mpe;
}
- if(errorMode != ErrorMode.FullReport) throw new IllegalStateException("Unsupported mode " + errorMode);
+ if(errorMode != ErrorMode.FULL_REPORT)
+ throw new IllegalStateException("Unsupported mode " + errorMode);
if(errors == null) {
- errors = new ArrayList<MicrodataParserException>();
+ errors = new ArrayList<>();
}
errors.add(mpe);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java b/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
index 8fef9b1..dc0eaeb 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/QuadTemplate.java
@@ -117,6 +117,7 @@ public class QuadTemplate {
public void printOut(ExtractionResult er, Map<String,String> variableAssignment) {
final Resource s = subject.getValue(variableAssignment);
final IRI p = predicate.getValue(variableAssignment);
+ @SuppressWarnings("unchecked")
final Value o = object.getValue(variableAssignment);
if(graph != null) {
final IRI g = graph.getValue(variableAssignment);
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
index d9156a5..95d7b0d 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateObject.java
@@ -18,7 +18,6 @@
package org.apache.any23.extractor.xpath;
import org.eclipse.rdf4j.model.Value;
-import org.eclipse.rdf4j.model.impl.BNodeImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
/**
@@ -32,9 +31,9 @@ public class TemplateObject extends Term {
* Supported object types.
*/
public enum Type {
- uri,
- bnode,
- literal
+ URI,
+ BNODE,
+ LITERAL
}
/**
@@ -61,33 +60,37 @@ public class TemplateObject extends Term {
@Override
protected Value getValueInternal(String value) {
switch (type) {
- case uri:
- try {
- return SimpleValueFactory.getInstance().createIRI(value);
- } catch (IllegalArgumentException iae) {
- throw new IllegalArgumentException(
- String.format("Expected a valid IRI for object template, found '%s'", value),
- iae
- );
- }
- case bnode:
+ case URI:
+ return createIRI(value);
+ case BNODE:
return SimpleValueFactory.getInstance().createBNode(value);
- case literal:
+ case LITERAL:
return SimpleValueFactory.getInstance().createLiteral(value);
default:
throw new IllegalStateException();
}
}
+ private Value createIRI(String value) {
+ try {
+ return SimpleValueFactory.getInstance().createIRI(value);
+ } catch (IllegalArgumentException iae) {
+ throw new IllegalArgumentException(
+ String.format("Expected a valid IRI for object template, found '%s'", value),
+ iae
+ );
+ }
+ }
+
@Override
public String toString() {
final String superStr = super.toString();
switch (type) {
- case uri:
+ case URI:
return "<" + superStr + ">";
- case bnode:
+ case BNODE:
return "_:" + superStr;
- case literal:
+ case LITERAL:
return "'" + superStr + "'";
default:
throw new IllegalStateException();
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
index a4ce270..80c8e57 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/TemplateSubject.java
@@ -18,7 +18,6 @@
package org.apache.any23.extractor.xpath;
import org.eclipse.rdf4j.model.Resource;
-import org.eclipse.rdf4j.model.impl.BNodeImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
/**
@@ -32,8 +31,8 @@ public class TemplateSubject extends Term<Resource> {
* Supported subject types.
*/
public enum Type {
- uri,
- bnode
+ URI,
+ BNODE
}
/**
@@ -60,9 +59,9 @@ public class TemplateSubject extends Term<Resource> {
@Override
protected Resource getValueInternal(String value) {
switch (type) {
- case uri:
+ case URI:
return SimpleValueFactory.getInstance().createIRI(value);
- case bnode:
+ case BNODE:
return SimpleValueFactory.getInstance().createBNode(value);
default:
throw new IllegalStateException();
@@ -73,9 +72,9 @@ public class TemplateSubject extends Term<Resource> {
public String toString() {
final String superStr = super.toString();
switch (type) {
- case uri:
+ case URI:
return "<" + superStr + ">";
- case bnode:
+ case BNODE:
return "_:" + superStr;
default:
throw new IllegalStateException();
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java b/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
index bacb90c..90863ac 100644
--- a/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
+++ b/core/src/main/java/org/apache/any23/extractor/yaml/ElementsProcessor.java
@@ -58,6 +58,10 @@ public class ElementsProcessor {
private static final ElementsProcessor _ep = new ElementsProcessor();
+ // hide constructor
+ private ElementsProcessor() {
+ }
+
/**
* A model holder describes the two required parameters which makes a model useful
* in further processing: a root node and model itself.
@@ -66,6 +70,11 @@ public class ElementsProcessor {
private final Value root;
private final Model model;
+ public ModelHolder(Value root, Model model) {
+ this.root = root;
+ this.model = model;
+ }
+
public Value getRoot() {
return root;
}
@@ -73,11 +82,6 @@ public class ElementsProcessor {
public Model getModel() {
return model;
}
-
- public ModelHolder(Value root, Model model) {
- this.root = root;
- this.model = model;
- }
}
@@ -100,15 +104,16 @@ public class ElementsProcessor {
* created.
* @return instance of {@link ModelHolder},
*/
+ @SuppressWarnings("unchecked")
public ModelHolder asModel(IRI namespace, final Object t, Value rootNode) {
if (t == null) {
return null;
}
if (t instanceof List) {
- return processList(namespace, (List) t);
+ return processList(namespace, (List<Object>) t);
} else if (t instanceof Map) {
- return processMap(namespace, (Map) t, rootNode);
+ return processMap(namespace, (Map<String, Object>) t, rootNode);
} else if (t instanceof String) {
return asModelHolder(RDFUtils.makeIRI(t.toString()), modelFactory.createEmptyModel());
} else {
@@ -174,7 +179,6 @@ public class ElementsProcessor {
return asModelHolder(nodeURI, model);
}
- @SuppressWarnings("UnusedAssignment")
protected ModelHolder processList(IRI ns, List<Object> object) {
if (object.isEmpty() || object.stream().noneMatch((i) -> {
@@ -217,10 +221,6 @@ public class ElementsProcessor {
return asModelHolder(listRoot, finalModel);
}
- // hide constructor
- private ElementsProcessor() {
- }
-
public static final ElementsProcessor getInstance() {
return _ep;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java b/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
index c6efba8..0cbc78c 100644
--- a/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
+++ b/core/src/main/java/org/apache/any23/rdf/Any23ValueFactoryWrapper.java
@@ -29,7 +29,6 @@ import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
-import org.eclipse.rdf4j.model.impl.ValueFactoryBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -90,73 +89,90 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
return defaultLiteralLanguage;
}
+ @Override
public BNode createBNode() {
return wrappedFactory.createBNode();
}
+ @Override
public BNode createBNode(String id) {
- if (id == null) return null;
+ if (id == null)
+ return null;
return wrappedFactory.createBNode(id);
}
+ @Override
public Literal createLiteral(String content) {
- if (content == null) return null;
+ if (content == null)
+ return null;
if (defaultLiteralLanguage == null) {
- return wrappedFactory.createLiteral(content);
+ return wrappedFactory.createLiteral(content);
} else {
- return wrappedFactory.createLiteral(content, defaultLiteralLanguage);
+ return wrappedFactory.createLiteral(content, defaultLiteralLanguage);
}
}
+ @Override
public Literal createLiteral(boolean b) {
return wrappedFactory.createLiteral(b);
}
+ @Override
public Literal createLiteral(byte b) {
return wrappedFactory.createLiteral(b);
}
+ @Override
public Literal createLiteral(short i) {
return wrappedFactory.createLiteral(i);
}
+ @Override
public Literal createLiteral(int i) {
return wrappedFactory.createLiteral(i);
}
+ @Override
public Literal createLiteral(long l) {
return wrappedFactory.createLiteral(l);
}
+ @Override
public Literal createLiteral(float v) {
return wrappedFactory.createLiteral(v);
}
+ @Override
public Literal createLiteral(double v) {
return wrappedFactory.createLiteral(v);
}
- @Override
- public Literal createLiteral(BigDecimal v) {
+ @Override
+ public Literal createLiteral(BigDecimal v) {
return wrappedFactory.createLiteral(v);
- }
+ }
- @Override
- public Literal createLiteral(BigInteger v) {
+ @Override
+ public Literal createLiteral(BigInteger v) {
return wrappedFactory.createLiteral(v);
- }
+ }
+ @Override
public Literal createLiteral(XMLGregorianCalendar calendar) {
return wrappedFactory.createLiteral(calendar);
}
+ @Override
public Literal createLiteral(String label, String language) {
- if (label == null) return null;
+ if (label == null)
+ return null;
return wrappedFactory.createLiteral(label, language);
}
+ @Override
public Literal createLiteral(String pref, IRI value) {
- if (pref == null) return null;
+ if (pref == null)
+ return null;
return wrappedFactory.createLiteral(pref, value);
}
@@ -165,6 +181,7 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
return wrappedFactory.createLiteral(date);
}
+ @Override
public Statement createStatement(Resource sub, IRI pre, Value obj) {
if (sub == null || pre == null || obj == null) {
return null;
@@ -172,8 +189,10 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
return wrappedFactory.createStatement(sub, pre, obj);
}
+ @Override
public Statement createStatement(Resource sub, IRI pre, Value obj, Resource context) {
- if (sub == null || pre == null || obj == null) return null;
+ if (sub == null || pre == null || obj == null)
+ return null;
return wrappedFactory.createStatement(sub, pre, obj, context);
}
@@ -181,8 +200,10 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
* @param uriStr input string to create URI from.
* @return a valid sesame IRI or null if any exception occurred
*/
+ @Override
public IRI createIRI(String uriStr) {
- if (uriStr == null) return null;
+ if (uriStr == null)
+ return null;
try {
return wrappedFactory.createIRI(RDFUtils.fixIRIWithException(uriStr));
} catch (Exception e) {
@@ -194,8 +215,10 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
/**
* @return a valid sesame IRI or null if any exception occurred
*/
+ @Override
public IRI createIRI(String namespace, String localName) {
- if (namespace == null || localName == null) return null;
+ if (namespace == null || localName == null)
+ return null;
return wrappedFactory.createIRI(RDFUtils.fixIRIWithException(namespace), localName);
}
@@ -235,9 +258,11 @@ public class Any23ValueFactoryWrapper implements ValueFactory {
* @return a valid sesame IRI or null if any exception occurred
*/
public IRI fixLink(String link, String defaultSchema) {
- if (link == null) return null;
+ if (link == null)
+ return null;
link = fixWhiteSpace(link);
- if ("".equals(link)) return null;
+ if ("".equals(link))
+ return null;
if (defaultSchema != null && !link.startsWith(defaultSchema + ":")) {
link = defaultSchema + ":" + link;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
index b8f143b..aee9e14 100644
--- a/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
+++ b/core/src/main/java/org/apache/any23/rdf/RDFUtils.java
@@ -24,19 +24,19 @@ import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
-import org.eclipse.rdf4j.model.URI;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.rio.RDFFormat;
-import org.eclipse.rdf4j.rio.RDFHandlerException;
-import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.RDFParserRegistry;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
import org.eclipse.rdf4j.rio.helpers.StatementCollector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
@@ -67,6 +67,10 @@ public class RDFUtils {
private static final ValueFactory valueFactory = SimpleValueFactory.getInstance();
+ private static final Logger LOG = LoggerFactory.getLogger(RDFUtils.class);
+
+ private RDFUtils() {}
+
/**
* Fixes typical errors in an absolute org.eclipse.rdf4j.model.IRI, such as unescaped spaces.
*
@@ -177,17 +181,6 @@ public class RDFUtils {
/**
* Creates a {@link org.eclipse.rdf4j.model.IRI}.
- * @param uri string representation of the {@link org.eclipse.rdf4j.model.IRI}
- * @return a valid {@link org.eclipse.rdf4j.model.IRI}
- * @deprecated Use {@link #iri(String)} instead.
- */
- @Deprecated
- public static org.eclipse.rdf4j.model.IRI uri(String uri) {
- return iri(uri);
- }
-
- /**
- * Creates a {@link org.eclipse.rdf4j.model.IRI}.
* @param iri a base string for the {@link org.eclipse.rdf4j.model.IRI}
* @return a valid {@link org.eclipse.rdf4j.model.IRI}
*/
@@ -197,18 +190,6 @@ public class RDFUtils {
/**
* Creates a {@link org.eclipse.rdf4j.model.IRI}.
- * @deprecated Use {@link #iri(String, String)} instead.
- * @param namespace a base namespace for the {@link org.eclipse.rdf4j.model.IRI}
- * @param localName a local name to associate with the namespace
- * @return a valid {@link org.eclipse.rdf4j.model.IRI}
- */
- @Deprecated
- public static org.eclipse.rdf4j.model.IRI uri(String namespace, String localName) {
- return valueFactory.createIRI(namespace, localName);
- }
-
- /**
- * Creates a {@link org.eclipse.rdf4j.model.IRI}.
* @param namespace a base namespace for the {@link org.eclipse.rdf4j.model.IRI}
* @param localName a local name to associate with the namespace
* @return a valid {@link org.eclipse.rdf4j.model.IRI}
@@ -297,25 +278,12 @@ public class RDFUtils {
* @return valid {@link org.eclipse.rdf4j.model.Literal}
*/
public static Literal literal(String s, String l) {
- if(l == null) {
- // HACK: Workaround for ANY23 code that passes null in for language tag
- return valueFactory.createLiteral(s);
- } else {
- return valueFactory.createLiteral(s, l);
- }
- }
-
- /**
- * Creates a {@link Literal}.
- * @param s string representation of the base namespace for the
- * {@link org.eclipse.rdf4j.model.Literal}
- * @param datatype the datatype to associate with the namespace.
- * @return valid {@link org.eclipse.rdf4j.model.Literal}
- * @deprecated Use {@link #literal(String, org.eclipse.rdf4j.model.IRI)} instead.
- */
- @Deprecated
- public static Literal literal(String s, URI datatype) {
- return valueFactory.createLiteral(s, datatype);
+ if(l == null) {
+ // HACK: Workaround for ANY23 code that passes null in for language tag
+ return valueFactory.createLiteral(s);
+ } else {
+ return valueFactory.createLiteral(s, l);
+ }
}
/**
@@ -488,15 +456,13 @@ public class RDFUtils {
* @param is input stream containing <code>RDF</code>.
* @param baseIRI base uri.
* @return list of statements detected within the input stream.
- * @throws RDFHandlerException if there is an error handling the RDF
* @throws IOException if there is an error reading the {@link java.io.InputStream}
- * @throws RDFParseException if there is an error handling the RDF
*/
public static Statement[] parseRDF(RDFFormat format, InputStream is, String baseIRI)
- throws RDFHandlerException, IOException, RDFParseException {
+ throws IOException {
final StatementCollector handler = new StatementCollector();
final RDFParser parser = getParser(format);
- parser.setVerifyData(true);
+ parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
parser.setStopAtFirstError(true);
parser.setPreserveBNodeIDs(true);
parser.setRDFHandler(handler);
@@ -511,12 +477,10 @@ public class RDFUtils {
* @param format input format type.
* @param is input stream containing <code>RDF</code>.
* @return list of statements detected within the input stream.
- * @throws RDFHandlerException if there is an error handling the RDF
* @throws IOException if there is an error reading the {@link java.io.InputStream}
- * @throws RDFParseException if there is an error handling the RDF
*/
public static Statement[] parseRDF(RDFFormat format, InputStream is)
- throws RDFHandlerException, IOException, RDFParseException {
+ throws IOException {
return parseRDF(format, is, "");
}
@@ -527,12 +491,10 @@ public class RDFUtils {
* @param format input format type.
* @param in input string containing <code>RDF</code>.
* @return list of statements detected within the input string.
- * @throws RDFHandlerException if there is an error handling the RDF
* @throws IOException if there is an error reading the {@link java.io.InputStream}
- * @throws RDFParseException if there is an error handling the RDF
*/
public static Statement[] parseRDF(RDFFormat format, String in)
- throws RDFHandlerException, IOException, RDFParseException {
+ throws IOException {
return parseRDF(format, new ByteArrayInputStream(in.getBytes()));
}
@@ -543,11 +505,9 @@ public class RDFUtils {
* @param resource resource name.
* @return the statements declared within the resource file.
* @throws java.io.IOException if an error occurs while reading file.
- * @throws org.eclipse.rdf4j.rio.RDFHandlerException if an error occurs while parsing file.
- * @throws org.eclipse.rdf4j.rio.RDFParseException if an error occurs while parsing file.
*/
- public static Statement[] parseRDF(String resource) throws RDFHandlerException, IOException, RDFParseException {
- final int extIndex = resource.lastIndexOf(".");
+ public static Statement[] parseRDF(String resource) throws IOException {
+ final int extIndex = resource.lastIndexOf('.');
if(extIndex == -1)
throw new IllegalArgumentException("Error while detecting the extension in resource name " + resource);
final String extension = resource.substring(extIndex + 1);
@@ -568,8 +528,10 @@ public class RDFUtils {
new java.net.URI(href.trim());
return true;
} catch (IllegalArgumentException e) {
+ LOG.error("Error processing href: {}", href, e);
return false;
} catch (URISyntaxException e) {
+ LOG.error("Error interpreting href: {} as URI.", href, e);
return false;
}
}
@@ -649,7 +611,5 @@ public class RDFUtils {
nodeId++;
return bnode;
}
-
- private RDFUtils() {}
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java b/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
index 648db28..2e591d0 100644
--- a/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
+++ b/core/src/main/java/org/apache/any23/validator/XMLValidationReportSerializer.java
@@ -41,6 +41,7 @@ import java.util.List;
*/
public class XMLValidationReportSerializer implements ValidationReportSerializer {
+ @Override
public void serialize(ValidationReport vr, OutputStream os) throws SerializationException {
final PrintStream ps = new PrintStream(os);
try {
@@ -54,9 +55,9 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
if(o == null) {
return;
}
- final Class oClass = o.getClass();
+ final Class<? extends Object> oClass = o.getClass();
final String oClassName = getClassName(oClass);
- ps.printf("<%s>\n", oClassName);
+ ps.printf("<%s>%n", oClassName);
List<Method> getters = filterGetters(o.getClass());
if(getters.isEmpty()) {
ps.print( o.toString() );
@@ -65,11 +66,11 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
for (Method getter : getters) {
serializeGetterValue(o, getter, ps);
}
- ps.printf("</%s>\n", oClassName);
+ ps.printf("</%s>%n", oClassName);
}
- private String getClassName(Class oClass) {
- final NodeName nodeName = (NodeName) oClass.getAnnotation(NodeName.class);
+ private String getClassName(Class<? extends Object> oClass) {
+ final NodeName nodeName = oClass.getAnnotation(NodeName.class);
if(nodeName != null) {
return nodeName.value();
}
@@ -77,9 +78,9 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
return Character.toLowerCase(simpleName.charAt(0)) + simpleName.substring(1);
}
- private List<Method> filterGetters(Class c) {
+ private List<Method> filterGetters(Class<? extends Object> c) {
Method[] methods = c.getDeclaredMethods();
- List<Method> filtered = new ArrayList<Method>();
+ List<Method> filtered = new ArrayList<>();
for(Method method : methods) {
if(Modifier.isStatic(method.getModifiers())) {
continue;
@@ -110,9 +111,9 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
}
final String property = getPropertyFromMethodName(methodName);
if( isManaged(value) ) {
- ps.printf("<%s>\n", property);
+ ps.printf("<%s>%n", property);
printObject(value, ps);
- ps.printf("</%s>\n", property);
+ ps.printf("</%s>%n", property);
} else {
List<Method> getters = filterGetters(value.getClass());
for (Method getter : getters) {
@@ -148,7 +149,7 @@ public class XMLValidationReportSerializer implements ValidationReportSerializer
return;
}
if(o instanceof Collection) {
- Collection collection = (Collection) o;
+ Collection<?> collection = (Collection<?>) o;
if(collection.isEmpty()) {
return;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java b/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
index 0275c4e..cb3e668 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/AboutNotURIRule.java
@@ -45,6 +45,7 @@ public class AboutNotURIRule implements Rule {
return "about-not-uri-rule";
}
+ @SuppressWarnings("unchecked")
@Override
public boolean applyOn(
DOMDocument document,
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
index 1b965ec..757b6de 100644
--- a/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
+++ b/core/src/main/java/org/apache/any23/validator/rule/MetaNameMisuseRule.java
@@ -43,6 +43,7 @@ public class MetaNameMisuseRule implements Rule {
return "meta-name-misuse-rule";
}
+ @SuppressWarnings("unchecked")
@Override
public boolean applyOn(
DOMDocument document,
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java b/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
index 70d241a..f678913 100644
--- a/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
+++ b/core/src/main/java/org/apache/any23/vocab/RDFSchemaUtils.java
@@ -43,7 +43,9 @@ import java.util.Map;
public class RDFSchemaUtils {
private static final String RDF_XML_SEPARATOR = StringUtils.multiply('=', 100);
-
+
+ private RDFSchemaUtils() {}
+
/**
* Serializes a vocabulary composed of the given <code>namespace</code>,
* <code>resources</code> and <code>properties</code>.
@@ -61,7 +63,7 @@ public class RDFSchemaUtils {
IRI[] properties,
Map<IRI,String> comments,
RDFWriter writer
- ) throws RDFHandlerException {
+ ) {
writer.startRDF();
for(IRI clazz : classes) {
writer.handleStatement( RDFUtils.quad(clazz, RDF.TYPE, RDFS.CLASS, namespace) );
@@ -87,8 +89,7 @@ public class RDFSchemaUtils {
* @param writer output writer.
* @throws RDFHandlerException if there is an error handling the RDF
*/
- public static void serializeVocabulary(Vocabulary vocabulary, RDFWriter writer)
- throws RDFHandlerException {
+ public static void serializeVocabulary(Vocabulary vocabulary, RDFWriter writer) {
serializeVocabulary(
vocabulary.getNamespace(),
vocabulary.getClasses(),
@@ -111,8 +112,7 @@ public class RDFSchemaUtils {
Vocabulary vocabulary,
RDFFormat format,
boolean willFollowAnother,
- PrintStream ps
- ) throws RDFHandlerException {
+ PrintStream ps) {
final RDFWriter rdfWriter;
if(format == RDFFormat.RDFXML) {
rdfWriter = Rio.createWriter(RDFFormat.RDFXML, ps);
@@ -134,8 +134,7 @@ public class RDFSchemaUtils {
* @return string contained serialization.
* @throws RDFHandlerException if there is an error handling the RDF
*/
- public static String serializeVocabulary(Vocabulary vocabulary, RDFFormat format)
- throws RDFHandlerException {
+ public static String serializeVocabulary(Vocabulary vocabulary, RDFFormat format) {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final PrintStream ps = new PrintStream(baos);
serializeVocabulary(vocabulary, format, false, ps);
@@ -150,16 +149,17 @@ public class RDFSchemaUtils {
* @param ps output print stream.
*/
public static void serializeVocabularies(RDFFormat format, PrintStream ps) {
- final Class vocabularyClass = Vocabulary.class;
+ final Class<Vocabulary> vocabularyClass = Vocabulary.class;
+ @SuppressWarnings("rawtypes")
final List<Class> vocabularies = DiscoveryUtils.getClassesInPackage(
vocabularyClass.getPackage().getName(),
vocabularyClass
);
int currentIndex = 0;
- for (Class vocabClazz : vocabularies) {
+ for (Class<?> vocabClazz : vocabularies) {
final Vocabulary instance;
try {
- final Constructor constructor = vocabClazz.getDeclaredConstructor();
+ final Constructor<?> constructor = vocabClazz.getDeclaredConstructor();
constructor.setAccessible(true);
instance = (Vocabulary) constructor.newInstance();
} catch (Exception e) {
@@ -173,6 +173,4 @@ public class RDFSchemaUtils {
}
}
- private RDFSchemaUtils() {}
-
}