You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2015/09/10 09:50:45 UTC

[01/13] any23 git commit: Reformat some of README for improved readability

Repository: any23
Updated Branches:
  refs/heads/master 0d106d4f2 -> 9bcc9ea3c


Reformat some of README for improved readability


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/34f0d24a
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/34f0d24a
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/34f0d24a

Branch: refs/heads/master
Commit: 34f0d24adcb7d5544055619a1c10b4718d28c30c
Parents: a03bafa
Author: Lewis John McGibbney <le...@jpl.nasa.gov>
Authored: Tue Apr 14 15:15:52 2015 -0500
Committer: Lewis John McGibbney <le...@jpl.nasa.gov>
Committed: Tue Apr 14 15:15:52 2015 -0500

----------------------------------------------------------------------
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/34f0d24a/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index c5476ce..837c5de 100644
--- a/README.md
+++ b/README.md
@@ -67,11 +67,13 @@ tar -zxvf $ANY23_HOME/core/target/apache-any23-core-${version-SNAPSHOT}.tar.gz
 Any23 comes with some command line tools. Within the directory you just extracted, you can invoke:
 Linux
 ```  
-$ANY23_HOME/core/target/apache-any23-core-${version-SNAPSHOT}/bin/any23       # Provides the main Any23 use case: metadata extraction on a file or URL source.
+$ANY23_HOME/core/target/apache-any23-core-${version-SNAPSHOT}/bin/any23       
+# Provides the main Any23 use case: metadata extraction on a file or URL source.
 ```
 Windows
 ```
-$ANY23_HOME/core/target/apache-any23-core-${version-SNAPSHOT}/bin/any23.bat      # Provides the main Any23 use case: metadata extraction on a file or URL source.
+$ANY23_HOME/core/target/apache-any23-core-${version-SNAPSHOT}/bin/any23.bat      
+# Provides the main Any23 use case: metadata extraction on a file or URL source.
 ```
 The complete documentation about these tools can be found [here](http://any23.apache.org/getting-started.html)
 


[06/13] any23 git commit: correction to comments

Posted by le...@apache.org.
correction to comments


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/817029a8
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/817029a8
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/817029a8

Branch: refs/heads/master
Commit: 817029a862a2beeec06c30a4194963c3efb331d3
Parents: cc0dfbe
Author: Nisala <ni...@gmail.com>
Authored: Mon Jul 20 01:19:21 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Mon Jul 20 01:19:21 2015 +0530

----------------------------------------------------------------------
 api/src/main/java/org/apache/any23/vocab/HProduct.java             | 2 +-
 .../apache/any23/extractor/html/microformats2/HEventExtractor.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/817029a8/api/src/main/java/org/apache/any23/vocab/HProduct.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HProduct.java b/api/src/main/java/org/apache/any23/vocab/HProduct.java
index 9630db3..bbbaf32 100644
--- a/api/src/main/java/org/apache/any23/vocab/HProduct.java
+++ b/api/src/main/java/org/apache/any23/vocab/HProduct.java
@@ -20,7 +20,7 @@ package org.apache.any23.vocab;
 import org.openrdf.model.URI;
 
 /**
- * Vocabulary to map the <a href="http://microformats.org/wiki/hitem">h-item</a> microformat.
+ * Vocabulary to map the <a href="http://microformats.org/wiki/h-product">h-product</a> microformat.
  *
  * @author Nisala Nirmana
  */

http://git-wip-us.apache.org/repos/asf/any23/blob/817029a8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
index 8ce70a6..ce67d86 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -33,7 +33,7 @@ import static org.apache.any23.extractor.html.HTMLDocument.TextField;
 
 
 /**
- * Extractor for the <a href="http://microformats.org/wiki/hcalendar">hCalendar</a>
+ * Extractor for the <a href="http://microformats.org/wiki/h-event">h-event</a>
  * microformat.
  *
  * @author Nisala Nirmana


[10/13] any23 git commit: non commited HCard embedded properties

Posted by le...@apache.org.
non commited HCard embedded properties


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/6ad6d875
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/6ad6d875
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/6ad6d875

Branch: refs/heads/master
Commit: 6ad6d87585a566765b34a6fc5c226d54f55d56eb
Parents: cf48a5b
Author: Nisala <ni...@gmail.com>
Authored: Thu Aug 27 01:06:57 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Thu Aug 27 01:06:57 2015 +0530

----------------------------------------------------------------------
 .../apache/any23/extractor/html/microformats2/HEventExtractor.java | 2 +-
 .../any23/extractor/html/microformats2/HResumeExtractor.java       | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/6ad6d875/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
index 3f4d817..67a476f 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -96,7 +96,7 @@ public class HEventExtractor extends EntityBasedMicroformatExtractor {
         addURLs(fragment, event);
         addCategories(fragment, event);
         addLocations(fragment, event);
-        
+        addAttendees(fragment,event);
         return true;
     }
 

http://git-wip-us.apache.org/repos/asf/any23/blob/6ad6d875/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
index 2026219..06f4f3c 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
@@ -80,6 +80,8 @@ public class HResumeExtractor extends EntityBasedMicroformatExtractor {
         addExperiences(fragment, person);
         addEducations(fragment, person);
 
+        addAffiliations(fragment, person);
+        addContacts(fragment,person);
 
         final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
         tser.addResourceRoot(


[13/13] any23 git commit: Merge branch 'gsoc_2015' of https://github.com/nisalanirmana/any23

Posted by le...@apache.org.
Merge branch 'gsoc_2015' of https://github.com/nisalanirmana/any23


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/9bcc9ea3
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/9bcc9ea3
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/9bcc9ea3

Branch: refs/heads/master
Commit: 9bcc9ea3c7a007b1b1993421ad701667314f5dc9
Parents: ffcecb3 47571dd
Author: Lewis John McGibbney <le...@jpl.nasa.gov>
Authored: Thu Sep 10 00:48:00 2015 -0700
Committer: Lewis John McGibbney <le...@jpl.nasa.gov>
Committed: Thu Sep 10 00:48:00 2015 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/any23/vocab/HCard.java |  86 ++++
 .../java/org/apache/any23/vocab/HEntry.java     |  60 +++
 .../java/org/apache/any23/vocab/HEvent.java     |  57 +++
 .../main/java/org/apache/any23/vocab/HItem.java |  47 ++
 .../java/org/apache/any23/vocab/HProduct.java   |  58 +++
 .../java/org/apache/any23/vocab/HResume.java    |  54 +++
 .../main/java/org/apache/any23/vocab/VCard.java |   5 +
 .../extractor/html/MicroformatExtractor.java    |   5 +
 .../html/microformats2/HAdrExtractor.java       | 113 +++++
 .../microformats2/HAdrExtractorFactory.java     |  57 +++
 .../html/microformats2/HCardExtractor.java      | 450 +++++++++++++++++++
 .../microformats2/HCardExtractorFactory.java    |  57 +++
 .../html/microformats2/HEntryExtractor.java     | 250 +++++++++++
 .../microformats2/HEntryExtractorFactory.java   |  60 +++
 .../html/microformats2/HEventExtractor.java     | 262 +++++++++++
 .../microformats2/HEventExtractorFactory.java   |  57 +++
 .../html/microformats2/HGeoExtractor.java       |  94 ++++
 .../microformats2/HGeoExtractorFactory.java     |  57 +++
 .../html/microformats2/HItemExtractor.java      | 102 +++++
 .../microformats2/HItemExtractorFactory.java    |  57 +++
 .../html/microformats2/HProductExtractor.java   | 172 +++++++
 .../microformats2/HProductExtractorFactory.java |  56 +++
 .../html/microformats2/HRecipeExtractor.java    | 189 ++++++++
 .../microformats2/HRecipeExtractorFactory.java  |  57 +++
 .../html/microformats2/HResumeExtractor.java    | 191 ++++++++
 .../microformats2/HResumeExtractorFactory.java  |  57 +++
 .../microformats2/Microformats2Prefixes.java    |  27 ++
 .../microformats2/annotations/Includes.java     |  41 ++
 .../microformats2/annotations/package-info.java |  24 +
 .../html/microformats2/package-info.java        |  24 +
 .../html/microformats2/example-mf2-h-adr.html   |  27 ++
 .../html/microformats2/example-mf2-h-geo.html   |  22 +
 .../apache/any23/prefixes/prefixes.properties   |   5 +
 .../html/microformats2/HAdrExtractorTest.java   |  37 ++
 .../html/microformats2/HCardExtractorTest.java  |  37 ++
 .../html/microformats2/HEntryExtractorTest.java |  37 ++
 .../html/microformats2/HEventExtractorTest.java |  37 ++
 .../html/microformats2/HGeoExtractorTest.java   |  47 ++
 .../html/microformats2/HItemExtractorTest.java  |  38 ++
 .../microformats2/HProductExtractorTest.java    |  37 ++
 .../microformats2/HRecipeExtractorTest.java     |  39 ++
 .../microformats2/HResumeExtractorTest.java     |  37 ++
 .../apache/any23/vocab/RDFSchemaUtilsTest.java  |   4 +-
 .../microformats2/h-adr/h-adr-test.html         |  34 ++
 .../microformats2/h-card/h-card-test.html       |  45 ++
 .../microformats2/h-entry/h-entry-test.html     |  74 +++
 .../microformats2/h-event/h-event-test.html     |  36 ++
 .../microformats2/h-geo/h-geo-test.html         |  33 ++
 .../microformats2/h-item/h-item-test.html       |  27 ++
 .../microformats2/h-product/h-product-test.html |  36 ++
 .../microformats2/h-recipe/h-recipe-test.html   |  71 +++
 .../microformats2/h-resume/h-resume-test.html   |  49 ++
 52 files changed, 3633 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[04/13] any23 git commit: added extractors HRecipe and HItem

Posted by le...@apache.org.
added extractors HRecipe and HItem


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/1616c17c
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/1616c17c
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/1616c17c

Branch: refs/heads/master
Commit: 1616c17cb6497bcdf7947ee1048027f1b6d83a9f
Parents: ff81602
Author: Nisala Nirmana <ni...@gmail.com>
Authored: Mon Jul 6 01:50:42 2015 +0530
Committer: Nisala Nirmana <ni...@gmail.com>
Committed: Mon Jul 6 01:50:42 2015 +0530

----------------------------------------------------------------------
 .../main/java/org/apache/any23/vocab/HItem.java |  30 +++
 .../html/microformats2/HItemExtractor.java      |  85 +++++++++
 .../microformats2/HItemExtractorFactory.java    |  40 ++++
 .../html/microformats2/HRecipeExtractor.java    | 189 +++++++++++++++++++
 .../microformats2/HRecipeExtractorFactory.java  |  57 ++++++
 .../html/microformats2/HItemExtractorTest.java  |  38 ++++
 .../microformats2/HRecipeExtractorTest.java     |  39 ++++
 .../microformats2/h-item/h-item-test.html       |  27 +++
 .../microformats2/h-recipe/h-recipe-test.html   |  71 +++++++
 9 files changed, 576 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/api/src/main/java/org/apache/any23/vocab/HItem.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HItem.java b/api/src/main/java/org/apache/any23/vocab/HItem.java
new file mode 100644
index 0000000..db54e65
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HItem.java
@@ -0,0 +1,30 @@
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/hitem">h-item</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HItem extends Vocabulary {
+
+    public static final String NS = SINDICE.NS + "hitem/";
+
+    private static HItem instance;
+
+    public static HItem getInstance() {
+        if(instance == null) {
+            instance = new HItem();
+        }
+        return instance;
+    }
+
+    public URI Item  = createClass(NS, "Item");
+    public URI name  = createProperty(NS, "name");
+    public URI url   = createProperty(NS, "url");
+    public URI photo = createProperty(NS, "photo");
+    private HItem() {
+        super(NS);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
new file mode 100644
index 0000000..19ed757
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
@@ -0,0 +1,85 @@
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.vocab.HItem;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-item">h-item</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HItemExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HItem vHITEM = HItem.getInstance();
+
+    private static final String[] itemFields = {
+            "name",
+            "url",
+            "photo"
+    };
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HItemExtractorFactory.getDescriptionInstance();
+    }
+
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"item";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException{
+        if (null == node) return false;
+        final HTMLDocument document = new HTMLDocument(node);
+        BNode item = getBlankNodeFor(node);
+        out.writeTriple(item, RDF.TYPE, vHITEM.Item);
+        final String extractorName = getDescription().getExtractorName();
+        addName(document,item);
+        addPhotos(document,item);
+        addUrls(document,item);
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
+        tser.addResourceRoot(document.getPathToLocalRoot(), item, this.getClass());
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode item, String fieldClass, URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(),item, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode item) {
+        mapFieldWithProperty(fragment, item, Microformats2Prefixes.PROPERTY_PREFIX+itemFields[0], vHITEM.name);
+    }
+
+    private void addPhotos(HTMLDocument fragment, BNode item) throws ExtractionException {
+        final HTMLDocument.TextField[] photos = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX+itemFields[2]);
+        for(HTMLDocument.TextField photo : photos) {
+            addURIProperty(item, vHITEM.photo, fragment.resolveURI(photo.value()));
+        }
+    }
+
+    private void addUrls(HTMLDocument fragment, BNode item) throws ExtractionException {
+        HTMLDocument.TextField[] links = fragment.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX+
+                itemFields[1]);
+        for (HTMLDocument.TextField link : links) {
+            conditionallyAddResourceProperty(item, vHITEM.url, getHTMLDocument().resolveURI(link.value()));
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
new file mode 100644
index 0000000..8423686
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
@@ -0,0 +1,40 @@
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HItemExtractorFactory extends SimpleExtractorFactory<HItemExtractor> implements
+        ExtractorFactory<HItemExtractor> {
+
+    public static final String NAME = "html-mf2-h-item";
+
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+
+    private static final ExtractorDescription descriptionInstance = new HItemExtractorFactory();
+
+    public HItemExtractorFactory() {
+        super(
+                HItemExtractorFactory.NAME,
+                HItemExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-item.html");
+    }
+
+    @Override
+    public HItemExtractor createExtractor() {
+        return new HItemExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractor.java
new file mode 100644
index 0000000..d4bf12e
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractor.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.vocab.HRecipe;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/hrecipe">hRecipe</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HRecipeExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HRecipe vHRECIPE = HRecipe.getInstance();
+
+    private static final String[] recipeFields = {
+            "name",
+            "ingredient",
+            "yield",
+            "instructions",
+            "duration",
+            "photo",
+            "summary",
+            "author",
+            "published",
+            "nutrition"
+    };
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HRecipeExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"recipe";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode recipe = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(recipe, RDF.TYPE, vHRECIPE.Recipe);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, recipe);
+        addIngredients(fragment, recipe);
+        addYield(fragment, recipe);
+        addInstructions(fragment, recipe);
+        addDurations(fragment, recipe);
+        addPhoto(fragment, recipe);
+        addSummary(fragment, recipe);
+        addAuthors(fragment, recipe);
+        addPublished(fragment, recipe);
+        addNutritions(fragment, recipe);
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), recipe, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode recipe) {
+        mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[0], vHRECIPE.fn);
+    }
+
+    private void addIngredients(HTMLDocument fragment, BNode recipe) {
+        final HTMLDocument.TextField[] ingredients = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[1]);
+        for(HTMLDocument.TextField ingredient : ingredients) {
+            conditionallyAddStringProperty(
+                    ingredient.source(), recipe, vHRECIPE.ingredient, ingredient.value()
+            );
+        }
+    }
+
+    private void addInstructions(HTMLDocument fragment, BNode recipe) {
+        mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX+recipeFields[2],
+                                                                                               vHRECIPE.instructions);
+    }
+
+    private void addYield(HTMLDocument fragment, BNode recipe) {
+        mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[3], vHRECIPE.yield);
+    }
+
+    private void addDurations(HTMLDocument fragment, BNode recipe) {
+        final HTMLDocument.TextField[] durations = fragment.getPluralTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + recipeFields[4]);
+        for(HTMLDocument.TextField duration : durations) {
+            Node attribute=duration.source().getAttributes().getNamedItem("datetime");
+            if (attribute==null){
+                conditionallyAddStringProperty(
+                    duration.source(),
+                    recipe, vHRECIPE.duration, duration.value()
+                );
+            }else{
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        recipe, vHRECIPE.duration, attribute.getNodeValue()
+                );
+
+            }
+
+        }
+    }
+
+    private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException {
+        final HTMLDocument.TextField[] photos = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX+recipeFields[5]);
+        for(HTMLDocument.TextField photo : photos) {
+            addURIProperty(recipe, vHRECIPE.photo, fragment.resolveURI(photo.value()));
+        }
+    }
+
+    private void addSummary(HTMLDocument fragment, BNode recipe) {
+        mapFieldWithProperty(fragment, recipe, Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[6], vHRECIPE.summary);
+    }
+
+    private void addAuthors(HTMLDocument fragment, BNode recipe) {
+        final HTMLDocument.TextField[] authors = fragment.
+                getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX + recipeFields[7]);
+         for(HTMLDocument.TextField author : authors) {
+             conditionallyAddStringProperty(
+                    author.source(),
+                    recipe, vHRECIPE.author, author.value()
+              );
+        }
+    }
+
+    private void addPublished(HTMLDocument fragment, BNode recipe) {
+        final HTMLDocument.TextField[] durations = fragment.getPluralTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + recipeFields[8]);
+        for(HTMLDocument.TextField duration : durations) {
+            Node attribute=duration.source().getAttributes().getNamedItem("datetime");
+            if (attribute==null){
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        recipe, vHRECIPE.published, duration.value()
+                );
+            }else{
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        recipe, vHRECIPE.published, attribute.getNodeValue()
+                );
+            }
+        }
+    }
+
+    private void addNutritions(HTMLDocument fragment, BNode recipe) {
+        final HTMLDocument.TextField[] nutritions = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX+recipeFields[9]);
+        for(HTMLDocument.TextField nutrition : nutritions) {
+            conditionallyAddStringProperty(
+                    nutrition.source(), recipe, vHRECIPE.nutrition, nutrition.value()
+            );
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorFactory.java
new file mode 100644
index 0000000..2f61f51
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HRecipeExtractorFactory extends SimpleExtractorFactory<HRecipeExtractor> implements
+        ExtractorFactory<HRecipeExtractor> {
+
+    public static final String NAME = "html-mf2-h-recipe";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hrecipe");
+
+    private static final ExtractorDescription descriptionInstance = new HRecipeExtractorFactory();
+    
+    public HRecipeExtractorFactory() {
+        super(
+                HRecipeExtractorFactory.NAME, 
+                HRecipeExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-recipe.html");
+    }
+    
+    @Override
+    public HRecipeExtractor createExtractor() {
+        return new HRecipeExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/core/src/test/java/org/apache/any23/extractor/html/microformats2/HItemExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HItemExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HItemExtractorTest.java
new file mode 100644
index 0000000..8163890
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HItemExtractorTest.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HItemExtractorTest extends AbstractExtractorTestCase {
+
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HItemExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-item/h-item-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 4);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/core/src/test/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorTest.java
new file mode 100644
index 0000000..883a630
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HRecipeExtractorTest.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HRecipeExtractorTest extends AbstractExtractorTestCase {
+
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HRecipeExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-recipe/h-recipe-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 15);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/test-resources/src/test/resources/microformats2/h-item/h-item-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-item/h-item-test.html b/test-resources/src/test/resources/microformats2/h-item/h-item-test.html
new file mode 100644
index 0000000..dc2b2c7
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-item/h-item-test.html
@@ -0,0 +1,27 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+    <head>
+    </head>
+    <body>
+        <div class="h-item">
+            <span class="p-name">food</span>   
+            <span><img class="u-photo" src="http://cargills.com/food/puddings.jpg" /></span>
+            <a class="u-url" href="http://cargills.com/food/">Online Supermarket</a>       
+        </div>
+    </body>
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/1616c17c/test-resources/src/test/resources/microformats2/h-recipe/h-recipe-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-recipe/h-recipe-test.html b/test-resources/src/test/resources/microformats2/h-recipe/h-recipe-test.html
new file mode 100644
index 0000000..20ea47b
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-recipe/h-recipe-test.html
@@ -0,0 +1,71 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+<head>
+    <title>http://microformats.org/wiki/hrecipe Example 1</title>
+</head>
+<body>
+<div class="h-recipe">
+    <span class="p-name">Yorkshire Puddings</span>    
+    <span class="p-summary">Makes<span class="p-yield">6 good sized Yorkshire puddings</span>,the way my mum taught me</span>
+    <span><img class="u-photo" src="http://codebits.glennjones.net/semantic/yorkshire-puddings.jpg" /></span>
+
+    <div>
+        <h3>Ingredients</h3>
+        <ul>
+            <li class="p-ingredient">1 egg</li>
+            <li class="p-ingredient">75g plain flour</li>
+            <li class="p-ingredient">70ml milk</li>
+            <li class="p-ingredient">60ml water</li>
+            <li class="p-ingredient">Pinch of salt</li>
+        </ul>
+    </div>
+
+    <h3>Time</h3>
+    <ul>
+        <li>Cook<span class="dt-duration">25 mins</span></li>
+    </ul> 
+
+
+    <h3>Instructions</h3>
+    <div class="e-instructions">
+        <ol>
+            <li>Pre-heat oven to 230C or gas mark 8. Pour the vegetable oil evenly into 2 x 4-hole 
+            Yorkshire pudding tins and place in the oven to heat through.</li> 
+            
+            <li>To make the batter, add all the flour into a bowl and beat in the eggs until smooth. 
+            Gradually add the milk and water while beating the mixture. It should be smooth and 
+            without lumps. Finally add a pinch of salt.</li>
+            
+            <li>Make sure the oil is piping hot before pouring the batter evenly into the tins. 
+            Place in the oven for 20-25 minutes until pudding have risen and look golden brown</li>
+        </ol>
+    </div>
+
+    <h3>Nutrition</h3>
+    <ul>
+        <li class="p-nutrition">Calories: <span>125</span></li>
+        <li class="p-nutrition">Fat: <span>3.2g</span></li>
+        <li class="p-nutrition">Cholesterol: <span>77mg</span></li>
+    </ul>
+
+    <span>Published on <time class="dt-published" datetime="2011-10-27">27 Oct 2011</time> by 
+  ` <span class="p-author">Glenn Jones</span>
+    
+</div>
+</body>
+</html>


[11/13] any23 git commit: adding license headers for HItem extractors

Posted by le...@apache.org.
adding license headers for HItem extractors


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/47571dda
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/47571dda
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/47571dda

Branch: refs/heads/master
Commit: 47571dda07a90c658c0fe202f3f3133b8c69dec9
Parents: 6ad6d87
Author: Nisala <ni...@gmail.com>
Authored: Thu Aug 27 09:44:48 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Thu Aug 27 09:44:48 2015 +0530

----------------------------------------------------------------------
 .../html/microformats2/HItemExtractor.java         | 17 +++++++++++++++++
 .../html/microformats2/HItemExtractorFactory.java  | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/47571dda/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
index 19ed757..4478dc0 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractor.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.any23.extractor.html.microformats2;
 
 import org.apache.any23.extractor.ExtractionException;

http://git-wip-us.apache.org/repos/asf/any23/blob/47571dda/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
index 14f20bd..4064955 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.any23.extractor.html.microformats2;
 
 import java.util.Arrays;


[12/13] any23 git commit: Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/any23

Posted by le...@apache.org.
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/any23


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/ffcecb35
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/ffcecb35
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/ffcecb35

Branch: refs/heads/master
Commit: ffcecb35d1b173276a386fe0d8ed2bc0ba532dee
Parents: 34f0d24 0d106d4
Author: Lewis John McGibbney <le...@jpl.nasa.gov>
Authored: Thu Sep 10 00:46:12 2015 -0700
Committer: Lewis John McGibbney <le...@jpl.nasa.gov>
Committed: Thu Sep 10 00:46:12 2015 -0700

----------------------------------------------------------------------
 .../any23/extractor/html/HTMLMetaExtractor.java | 88 ++++++++++++++++----
 .../test/java/org/apache/any23/Any23Test.java   |  4 +-
 .../extractor/html/HTMLMetaExtractorTest.java   |  9 +-
 .../html/html-head-link-extractor.html          |  1 -
 ...-meta-extractor-with-mozilla-extensions.html | 34 ++++++++
 5 files changed, 117 insertions(+), 19 deletions(-)
----------------------------------------------------------------------



[07/13] any23 git commit: author correction

Posted by le...@apache.org.
author correction


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/0008c7c7
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/0008c7c7
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/0008c7c7

Branch: refs/heads/master
Commit: 0008c7c770c8626b766da24522d9e12a8dc97215
Parents: 817029a
Author: Nisala <ni...@gmail.com>
Authored: Mon Jul 20 01:22:59 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Mon Jul 20 01:22:59 2015 +0530

----------------------------------------------------------------------
 .../any23/extractor/html/microformats2/HEventExtractorFactory.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/0008c7c7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
index 602b044..ef62f50 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
@@ -26,7 +26,7 @@ import org.apache.any23.rdf.PopularPrefixes;
 import org.apache.any23.rdf.Prefixes;
 
 /**
- * @author Peter Ansell p_ansell@yahoo.com
+ * @author Nisala Nirmana
  *
  */
 public class HEventExtractorFactory extends SimpleExtractorFactory<HEventExtractor> implements


[05/13] any23 git commit: adding HEvent and HProduct Extractors

Posted by le...@apache.org.
adding HEvent and HProduct Extractors


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/cc0dfbe8
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/cc0dfbe8
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/cc0dfbe8

Branch: refs/heads/master
Commit: cc0dfbe8127a00fa712c7d2df6785a73c290feae
Parents: 1616c17
Author: Nisala <ni...@gmail.com>
Authored: Mon Jul 20 01:12:27 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Mon Jul 20 01:12:27 2015 +0530

----------------------------------------------------------------------
 .../java/org/apache/any23/vocab/HEvent.java     |  57 ++++++
 .../java/org/apache/any23/vocab/HProduct.java   |  58 ++++++
 .../html/microformats2/HEventExtractor.java     | 195 +++++++++++++++++++
 .../microformats2/HEventExtractorFactory.java   |  57 ++++++
 .../html/microformats2/HProductExtractor.java   | 153 +++++++++++++++
 .../microformats2/HProductExtractorFactory.java |  56 ++++++
 .../apache/any23/prefixes/prefixes.properties   |   2 +
 .../html/microformats2/HEventExtractorTest.java |  37 ++++
 .../microformats2/HProductExtractorTest.java    |  37 ++++
 .../microformats2/h-event/h-event-test.html     |  36 ++++
 .../microformats2/h-product/h-product-test.html |  36 ++++
 11 files changed, 724 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HEvent.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HEvent.java b/api/src/main/java/org/apache/any23/vocab/HEvent.java
new file mode 100644
index 0000000..b936c3e
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HEvent.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/h-event">h-event</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEvent extends Vocabulary {
+    public static final String NS = SINDICE.NS + "hevent/";
+
+    private static HEvent instance;
+
+    public static HEvent getInstance() {
+        if(instance == null) {
+            instance = new HEvent();
+        }
+        return instance;
+    }
+
+    public URI event  = createClass(NS, "Event");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI summary   = createProperty(NS, "summary");
+    public URI start = createProperty(NS, "start");
+    public URI end   = createProperty(NS, "end");
+    public URI duration = createProperty(NS, "duration");
+    public URI description  = createProperty(NS, "description");
+    public URI url = createProperty(NS, "url");
+    public URI category  = createProperty(NS, "category");
+    public URI location = createProperty(NS, "location");
+    public URI attendee  = createProperty(NS, "attendee");
+
+
+    private HEvent() {
+        super(NS);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/api/src/main/java/org/apache/any23/vocab/HProduct.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HProduct.java b/api/src/main/java/org/apache/any23/vocab/HProduct.java
new file mode 100644
index 0000000..9630db3
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HProduct.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/hitem">h-item</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+
+public class HProduct extends Vocabulary {
+    public static final String NS = SINDICE.NS + "hproduct/";
+
+    private static HProduct instance;
+
+    public static HProduct getInstance() {
+        if(instance == null) {
+            instance = new HProduct();
+        }
+        return instance;
+    }
+
+    public URI product  = createClass(NS, "Product");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI photo  = createProperty(NS, "photo");
+    public URI brand = createProperty(NS, "brand");
+    public URI category  = createProperty(NS, "category");
+    public URI description  = createProperty(NS, "description");
+    public URI url = createProperty(NS, "url");
+    public URI identifier = createProperty(NS, "identifier");
+    public URI price = createProperty(NS, "price");
+    public URI review  = createProperty(NS, "review");
+
+
+    private HProduct() {
+        super(NS);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
new file mode 100644
index 0000000..8ce70a6
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.vocab.HEvent;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+import static org.apache.any23.extractor.html.HTMLDocument.TextField;
+
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/hcalendar">hCalendar</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEventExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HEvent vEvent = HEvent.getInstance();
+
+    private String[] eventFields = {
+            "name",
+            "summary",
+            "start",
+            "end",
+            "duration",
+            "description",
+            "url",
+            "category",
+            "location", //toDO
+            "attendee" //toDO
+    };
+
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HEventExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"event";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode event = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(event, RDF.TYPE, vEvent.event);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, event);
+        addSummary(fragment, event);
+        addStart(fragment, event);
+        addEnd(fragment, event);
+        addDuration(fragment, event);
+        addDescription(fragment, event);
+        addURLs(fragment, event);
+        addCategories(fragment, event);
+        addLocation(fragment, event);
+        
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass,
+                                      URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), recipe, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[0], vEvent.name);
+    }
+
+    private void addSummary(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[1], vEvent.summary);
+    }
+
+    private void addStart(HTMLDocument fragment, BNode event) {
+        final TextField start = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[2]);
+        if(start.source()==null)
+            return;
+        Node attribute = start.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    start.source(),
+                    event, vEvent.start, start.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    start.source(),
+                    event, vEvent.start, attribute.getNodeValue()
+            );
+        }
+    }
+
+    private void addEnd(HTMLDocument fragment, BNode event) {
+        final TextField end = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[3]);
+        if(end.source()==null)
+            return;
+        Node attribute = end.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    end.source(),
+                    event, vEvent.end, end.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    end.source(),
+                    event, vEvent.end, attribute.getNodeValue()
+            );
+        }
+    }
+
+    private void addDuration(HTMLDocument fragment, BNode event) {
+        final TextField duration = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + eventFields[4]);
+        if(duration.source()==null)
+            return;
+        Node attribute = duration.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    duration.source(),
+                    event, vEvent.duration, duration.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    duration.source(),
+                    event, vEvent.duration, attribute.getNodeValue()
+            );
+        }
+    }
+
+    private void addDescription(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[5], vEvent.description);
+    }
+
+    private void addURLs(HTMLDocument fragment, BNode event) throws ExtractionException {
+        final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + eventFields[6]);
+        for(HTMLDocument.TextField url : urls) {
+            addURIProperty(event, vEvent.url, fragment.resolveURI(url.value()));
+        }
+    }
+
+    private void addCategories(HTMLDocument fragment, BNode event) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + eventFields[7]);
+        for(HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), event, vEvent.category, category.value()
+            );
+        }
+    }
+
+    private void addLocation(HTMLDocument fragment, BNode event) {
+        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
+                eventFields[8], vEvent.location);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
new file mode 100644
index 0000000..602b044
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+public class HEventExtractorFactory extends SimpleExtractorFactory<HEventExtractor> implements
+        ExtractorFactory<HEventExtractor> {
+
+    public static final String NAME = "html-mf2-h-event";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hevent");
+
+    private static final ExtractorDescription descriptionInstance = new HEventExtractorFactory();
+    
+    public HEventExtractorFactory() {
+        super(
+                HEventExtractorFactory.NAME,
+                HEventExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-event.html");
+    }
+    
+    @Override
+    public HEventExtractor createExtractor() {
+        return new HEventExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
new file mode 100644
index 0000000..0e93935
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+import org.apache.any23.vocab.HProduct;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-product">h-product</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HProductExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HProduct vProduct = HProduct.getInstance();
+
+    private static final String[] productFields = {
+            "name",
+            "photo",
+            "brand", //toDo
+            "category",
+            "description",
+            "url",
+            "identifier",
+            "review", //toDo
+            "price"
+    };
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HProductExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"product";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode product = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(product, RDF.TYPE, vProduct.product);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, product);
+        addPhoto(fragment, product);
+        addCategories(fragment, product);
+        addDescription(fragment, product);
+        addURLs(fragment, product);
+        addIdentifiers(fragment, product);
+        addPrice(fragment, product);
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode product, String fieldClass,
+                                      URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), product, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode product) {
+        mapFieldWithProperty(fragment, product, Microformats2Prefixes.PROPERTY_PREFIX +
+                productFields[0], vProduct.name);
+    }
+
+    private void addPhoto(HTMLDocument fragment, BNode product) throws ExtractionException {
+        final HTMLDocument.TextField[] photos = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[1]);
+        for(HTMLDocument.TextField photo : photos) {
+            addURIProperty(product, vProduct.photo, fragment.resolveURI(photo.value()));
+        }
+    }
+
+    private void addCategories(HTMLDocument fragment, BNode product) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + productFields[3]);
+        for(HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), product, vProduct.category, category.value()
+            );
+        }
+    }
+
+    private void addDescription(HTMLDocument fragment, BNode product) {
+        mapFieldWithProperty(fragment, product, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX +
+                productFields[4], vProduct.description);
+    }
+
+    private void addURLs(HTMLDocument fragment, BNode product) throws ExtractionException {
+        final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[5]);
+        for(HTMLDocument.TextField url : urls) {
+            addURIProperty(product, vProduct.url, fragment.resolveURI(url.value()));
+        }
+    }
+
+    private void addIdentifiers(HTMLDocument fragment, BNode product) throws ExtractionException {
+        final HTMLDocument.TextField[] identifiers = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + productFields[6]);
+        for(HTMLDocument.TextField identifier :identifiers) {
+            addURIProperty(product, vProduct.identifier, fragment.resolveURI(identifier.value()));
+        }
+    }
+
+    private void addPrice(HTMLDocument fragment, BNode product) {
+        final HTMLDocument.TextField price = fragment.getSingularTextField(
+                Microformats2Prefixes.PROPERTY_PREFIX + productFields[8]);
+        if(price.source()==null)
+            return;
+        Node attribute = price.source().getAttributes().getNamedItem("value");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    price.source(),
+                    product, vProduct.price, price.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    price.source(),
+                    product, vProduct.price, attribute.getNodeValue()
+            );
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
new file mode 100644
index 0000000..f4b65d9
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractorFactory.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import java.util.Arrays;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HProductExtractorFactory extends SimpleExtractorFactory<HProductExtractor> implements
+        ExtractorFactory<HProductExtractor> {
+
+    public static final String NAME = "html-mf2-h-product";
+
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hproduct");
+
+    private static final ExtractorDescription descriptionInstance = new HProductExtractorFactory();
+
+    public HProductExtractorFactory() {
+        super(
+                HProductExtractorFactory.NAME,
+                HProductExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-product.html");
+    }
+
+    @Override
+    public HProductExtractor createExtractor() {
+        return new HProductExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
index 58516ec..34e3975 100644
--- a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
+++ b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
@@ -32,6 +32,8 @@ ex=http://example.com/ns#
 wo=http://purl.org/ontology/wo/
 skos=http://www.w3.org/2004/02/skos/core#
 hrecipe=http://sindice.com/hrecipe/
+hevent=http://sindice.com/hevent/
+hproduct=http://sindice.com/hproduct/
 sindice=http://vocab.sindice.net/
 og=http://opengraphprotocol.org/schema/
 fb=http://www.facebook.com/2008/fbml#

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
new file mode 100644
index 0000000..6c13909
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HEventExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HEventExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-event/h-event-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 9);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
new file mode 100644
index 0000000..3b46a7a
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HProductExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HProductExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-product/h-product-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 11);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-event/h-event-test.html b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
new file mode 100644
index 0000000..b8af9de
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-event/h-event-test.html
@@ -0,0 +1,36 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+   <!-- Microformats 2 -->
+
+    <div class="h-event">
+        <h1 class="p-name">Microformats Meetup</h1>
+        <a class="u-url" href="http://microformats.org/meetup/">Official event web site</a>
+        <p>From 
+            <time class="dt-start" datetime="2013-06-30 12:00">30<sup>th</sup> June 2013, 12:00</time>
+            to <time class="dt-end" datetime="2013-06-30 18:00">18:00</time>
+            at <span class="p-location">Some bar in SF</span></p>
+        <p class="p-summary">Get together and discuss all things microformats-related.</p>
+        <p class="p-description">This <span class="p-category">technical meetup</span> is hosted in aid of discussion related to new draft specification of microformats 2</p>
+    </div>
+
+</body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/cc0dfbe8/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-product/h-product-test.html b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
new file mode 100644
index 0000000..08ead4f
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-product/h-product-test.html
@@ -0,0 +1,36 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+   <!-- Microformats 2 -->
+
+   <div class="h-product">
+      <h1 class="p-name">Microformats For Dummies</h1>
+      <img class="u-photo" src="http://example.org/mfd.png" alt="" />
+      <div class="e-description">
+         <p>Want to get started using microformats, but intimidated by hyphens and mediawiki? This <span class="p-category">book</span>
+         contains everything you need to know!</p>
+      </div>
+         <p>Yours today for only <data class="p-price" value="20.00">$20.00</data>
+         from ACME Publishing inc.</p> 
+   </div>
+
+</body>
+
+</html>


[02/13] any23 git commit: added HAdr and HGeo Extractors support

Posted by le...@apache.org.
added HAdr and HGeo Extractors support


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/5b10339b
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/5b10339b
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/5b10339b

Branch: refs/heads/master
Commit: 5b10339b55ea04e097a960fd722e8553573daccf
Parents: a03bafa
Author: nisalanirmana <ni...@gmail.com>
Authored: Mon Jun 22 00:09:17 2015 +0530
Committer: nisalanirmana <ni...@gmail.com>
Committed: Mon Jun 22 00:09:17 2015 +0530

----------------------------------------------------------------------
 .../main/java/org/apache/any23/vocab/VCard.java |   5 +
 .../html/microformats2/HAdrExtractor.java       | 120 +++++++++++++++++++
 .../microformats2/HAdrExtractorFactory.java     |  57 +++++++++
 .../html/microformats2/HGeoExtractor.java       |  84 +++++++++++++
 .../microformats2/HGeoExtractorFactory.java     |  57 +++++++++
 .../microformats2/annotations/Includes.java     |  41 +++++++
 .../microformats2/annotations/package-info.java |  24 ++++
 .../html/microformats2/package-info.java        |  24 ++++
 .../html/microformats2/example-mf2-h-adr.html   |  27 +++++
 .../html/microformats2/example-mf2-h-geo.html   |  22 ++++
 .../html/microformats2/HAdrExtractorTest.java   |  37 ++++++
 .../html/microformats2/HGeoExtractorTest.java   |  47 ++++++++
 .../apache/any23/vocab/RDFSchemaUtilsTest.java  |   4 +-
 .../microformats2/h-adr/h-adr-test.html         |  33 +++++
 .../microformats2/h-geo/h-geo-test.html         |  33 +++++
 15 files changed, 613 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/api/src/main/java/org/apache/any23/vocab/VCard.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/VCard.java b/api/src/main/java/org/apache/any23/vocab/VCard.java
index f43c5eb..10d3c94 100644
--- a/api/src/main/java/org/apache/any23/vocab/VCard.java
+++ b/api/src/main/java/org/apache/any23/vocab/VCard.java
@@ -59,6 +59,11 @@ public class VCard extends Vocabulary {
     public final URI agent = createProperty("agent");
 
     /**
+     * The altitude of a geographic location.
+     */
+    public final URI altitude = createProperty("altitude");
+
+    /**
      * The birthday of a person.
      */
     public final URI bday = createProperty("bday");

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
new file mode 100644
index 0000000..022bf47
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.microformats2.annotations.Includes;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-adr">h-adr</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+@Includes( extractors = HGeoExtractor.class )
+public class HAdrExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final VCard vVCARD = VCard.getInstance();
+
+    private static final String[] addressFields = {
+            "p-street-address",
+            "p-extended-address",
+            "p-locality",
+            "p-region",
+            "p-postal-code",
+            "p-country-name",
+            "p-geo"
+    };
+
+    protected String getBaseClassName() {
+        return "h-adr";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    protected boolean extractEntity(Node node, ExtractionResult out) {
+        if (null == node) return false;
+        final HTMLDocument document = new HTMLDocument(node);
+        BNode adr = getBlankNodeFor(node);
+        out.writeTriple(adr, RDF.TYPE, vVCARD.Address);
+        final String extractorName = getDescription().getExtractorName();
+        for (String field : addressFields) {
+            HTMLDocument.TextField[] values = document.getPluralTextField(field);
+            for (HTMLDocument.TextField val : values) {
+               if(!field.equals("p-geo")) {
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                adr, vVCARD.getProperty(field.replaceFirst("p-", "")), val.value()
+                        );
+               }else {
+                   String[] composed = val.value().split(";");
+                   if (composed.length == 3){
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.latitude, composed[0]
+                       );
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.longitude, composed[1]
+                       );
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.altitude, composed[2]
+                       );
+
+                   }else if (composed.length == 2){
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.latitude, composed[0]
+                       );
+                       conditionallyAddStringProperty(
+                               val.source(),
+                               adr, vVCARD.longitude, composed[1]
+                       );
+                   }else {
+                       //we discard if only length is 1
+                   }
+
+               }
+
+            }
+        }
+
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
+        tser.addResourceRoot( document.getPathToLocalRoot(), adr, this.getClass() );
+
+        return true;
+    }
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HAdrExtractorFactory.getDescriptionInstance();
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
new file mode 100644
index 0000000..3b28fb5
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HAdrExtractorFactory extends SimpleExtractorFactory<HAdrExtractor> implements
+        ExtractorFactory<HAdrExtractor> {
+
+    public static final String NAME = "html-mf2-h-adr";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+
+    private static final ExtractorDescription descriptionInstance = new HAdrExtractorFactory();
+    
+    public HAdrExtractorFactory() {
+        super(
+                HAdrExtractorFactory.NAME,
+                HAdrExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-adr.html");
+    }
+    
+    @Override
+    public HAdrExtractor createExtractor() {
+        return new HAdrExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
new file mode 100644
index 0000000..4a1fbfd
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-geo">h-geo</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HGeoExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final VCard vVCARD = VCard.getInstance();
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HGeoExtractorFactory.getDescriptionInstance();
+    }
+
+    protected String getBaseClassName() {
+        return "h-geo";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    protected boolean extractEntity(Node node, ExtractionResult out) {
+        if (null == node) return false;
+        final HTMLDocument document = new HTMLDocument(node);
+        HTMLDocument.TextField latNode = document.getSingularTextField("p-latitude");
+        HTMLDocument.TextField lonNode = document.getSingularTextField("p-longitude");
+        HTMLDocument.TextField altNode = document.getSingularTextField("p-altitude");
+        String lat = latNode.value();
+        String lon = lonNode.value();
+        String alt = altNode.value();
+        BNode geo = getBlankNodeFor(node);
+        out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
+        final String extractorName = getDescription().getExtractorName();
+        conditionallyAddStringProperty(
+                latNode.source(),
+                geo, vVCARD.latitude , lat
+        );
+        conditionallyAddStringProperty(
+                lonNode.source(),
+                geo, vVCARD.longitude, lon
+        );
+        conditionallyAddStringProperty(
+                altNode.source(),
+                geo, vVCARD.altitude, alt
+        );
+
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
+        tser.addResourceRoot( document.getPathToLocalRoot(), geo, this.getClass() );
+
+        return true;
+    }
+    
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
new file mode 100644
index 0000000..5b60b7d
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HGeoExtractorFactory extends SimpleExtractorFactory<HGeoExtractor> implements
+        ExtractorFactory<HGeoExtractor> {
+
+    public static final String NAME = "html-mf2-h-geo";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+
+    private static final ExtractorDescription descriptionInstance = new HGeoExtractorFactory();
+    
+    public HGeoExtractorFactory() {
+        super(
+                HGeoExtractorFactory.NAME,
+                HGeoExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-geo.html");
+    }
+    
+    @Override
+    public HGeoExtractor createExtractor() {
+        return new HGeoExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
new file mode 100644
index 0000000..ff9d738
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/Includes.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2.annotations;
+
+import org.apache.any23.extractor.html.MicroformatExtractor;
+
+import java.lang.annotation.Documented;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+/**
+ * This annotation could be used to decorate a {@link MicroformatExtractor} to
+ * represent which of the other <i>Microformats</i> could it nest.
+ *
+ * @author Davide Palmisano ( dpalmisano@gmail.com )
+ */
+@Documented
+@Retention(RetentionPolicy.RUNTIME)
+@Target(ElementType.TYPE)
+public @interface Includes {
+
+    Class<? extends MicroformatExtractor>[] extractors();
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
new file mode 100644
index 0000000..3311c98
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/annotations/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  This package contains the annotations needed to describe the
+ *  single nesting relations among different <a href="http://microformats.org/">Microformats</a>.
+ *
+ *  @see org.apache.any23.extractor.html.MicroformatExtractor
+ */
+package org.apache.any23.extractor.microformats2.annotations;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
new file mode 100644
index 0000000..b961373
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/package-info.java
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ * All the various {@link org.apache.any23.extractor.Extractor} needed to distill <i>RDF</i>
+ * from <a href="http://microformats.org/">Microformats</a> in HTML pages are contained in this package.
+ *
+ */
+package org.apache.any23.extractor.html.microformats2;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
new file mode 100644
index 0000000..d6f2c06
--- /dev/null
+++ b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-adr.html
@@ -0,0 +1,27 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<div class="h-adr">
+  <span class="p-street-address">349/B</span> 
+  <span class="p-extended-address">Batagama,North</span>   
+  <span class="p-locality">Jaela</span>
+  <span class="p-region">Western</span>
+  <span class="p-postal-code">11325</span>    
+  <span class="p-country-name">SL</span></span>
+  <span class="p-label">349/B,Jaela</span>
+  <span class="p-geo">51.526421;-0.081067;25</span>
+</div>

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
new file mode 100644
index 0000000..c8b2607
--- /dev/null
+++ b/core/src/main/resources/org/apache/any23/extractor/html/microformats2/example-mf2-h-geo.html
@@ -0,0 +1,22 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<div class="h-geo">
+  <span>Latitude</span><div class="p-latitude">7.066622</div> 
+  <span>Longitude</span><div class="p-longitude">79.903048</div>
+  <span>Altitude</span><div class="p-altitude">15</div>
+<div>  

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
new file mode 100644
index 0000000..0fb3625
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HAdrExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HAdrExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException , RDFHandlerException {
+        assertExtract("/microformats2/h-adr/h-adr-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 10);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
new file mode 100644
index 0000000..eba89de
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.apache.any23.extractor.html.microformats2.HGeoExtractorFactory;
+import org.apache.any23.vocab.VCard;
+import org.junit.Assert;
+import org.junit.Test;
+import org.openrdf.model.Resource;
+import org.openrdf.model.vocabulary.RDF;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+import java.util.List;
+
+
+public class HGeoExtractorTest extends AbstractExtractorTestCase {
+
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HGeoExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException , RDFHandlerException {
+        assertExtract("/microformats2/h-geo/h-geo-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 4);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
index 3971191..b4f8b7a 100644
--- a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
+++ b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
@@ -43,7 +43,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesNTriples() {
-        serializeVocabularies(RDFFormat.NTRIPLES, 1918);
+        serializeVocabularies(RDFFormat.NTRIPLES, 1920);
     }
 
     /**
@@ -53,7 +53,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesRDFXML() {
-        serializeVocabularies(RDFFormat.RDFXML, 4987); // Effective lines + separators.
+        serializeVocabularies(RDFFormat.RDFXML, 4992); // Effective lines + separators.
     }
 
     private void serializeVocabularies(RDFFormat format, int expectedLines) {

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
new file mode 100644
index 0000000..b5c095a
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
@@ -0,0 +1,33 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+  <body>
+    <!-- Microformats 2 -->
+    <div class="h-adr">
+      <span class="p-street-address">349/B</span> 
+      <span class="p-extended-address">Batagama,North</span>   
+      <span class="p-locality">Jaela</span>
+      <span class="p-region">Western</span>
+      <span class="p-postal-code">11325</span>    
+      <span class="p-country-name">SL</span></span>
+      <span class="p-geo">51.526421;-0.081067;25</span> 
+    </div>
+  </body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/5b10339b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
new file mode 100644
index 0000000..c0181fe
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
@@ -0,0 +1,33 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+   <!-- Microformats 2 -->
+
+   <p>
+     <span class="h-geo">
+       <span class="p-latitude">54.155278</span>,
+       <span class="p-longitude">-2.249722</span>
+       <span class="p-altitude">694</span>
+     </span>
+   </p>
+
+</body>
+
+</html>


[03/13] any23 git commit: changes according to mentor michele feedback

Posted by le...@apache.org.
changes according to  mentor michele feedback


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/ff816027
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/ff816027
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/ff816027

Branch: refs/heads/master
Commit: ff816027510f731f3e3f6a3c410feb5c48ffd972
Parents: 5b10339
Author: Nisala Nirmana <ni...@gmail.com>
Authored: Sun Jun 28 22:33:29 2015 +0530
Committer: Nisala Nirmana <ni...@gmail.com>
Committed: Sun Jun 28 22:33:29 2015 +0530

----------------------------------------------------------------------
 .../html/microformats2/HAdrExtractor.java       | 69 +++++++++-----------
 .../html/microformats2/HGeoExtractor.java       | 52 +++++++++------
 .../microformats2/Microformats2Prefixes.java    | 26 ++++++++
 .../html/microformats2/HAdrExtractorTest.java   |  2 +-
 .../html/microformats2/HGeoExtractorTest.java   |  2 +-
 .../microformats2/h-adr/h-adr-test.html         | 21 +++---
 .../microformats2/h-geo/h-geo-test.html         |  8 +--
 7 files changed, 105 insertions(+), 75 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
index 022bf47..d0d9257 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java
@@ -17,12 +17,14 @@
 
 package org.apache.any23.extractor.html.microformats2;
 
+import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.TagSoupExtractionResult;
 import org.apache.any23.extractor.html.microformats2.annotations.Includes;
 import org.apache.any23.vocab.VCard;
 import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
 import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
@@ -40,17 +42,23 @@ public class HAdrExtractor extends EntityBasedMicroformatExtractor {
     private static final VCard vVCARD = VCard.getInstance();
 
     private static final String[] addressFields = {
-            "p-street-address",
-            "p-extended-address",
-            "p-locality",
-            "p-region",
-            "p-postal-code",
-            "p-country-name",
-            "p-geo"
+            "street-address",
+            "extended-address",
+            "locality",
+            "region",
+            "postal-code",
+            "country-name",
+            "geo"
+    };
+
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
     };
 
     protected String getBaseClassName() {
-        return "h-adr";
+        return Microformats2Prefixes.CLASS_PREFIX+"adr";
     }
 
     @Override
@@ -58,60 +66,45 @@ public class HAdrExtractor extends EntityBasedMicroformatExtractor {
         // Empty.
     }
 
-    protected boolean extractEntity(Node node, ExtractionResult out) {
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
         if (null == node) return false;
         final HTMLDocument document = new HTMLDocument(node);
         BNode adr = getBlankNodeFor(node);
         out.writeTriple(adr, RDF.TYPE, vVCARD.Address);
         final String extractorName = getDescription().getExtractorName();
         for (String field : addressFields) {
-            HTMLDocument.TextField[] values = document.getPluralTextField(field);
+            HTMLDocument.TextField[] values = document.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
             for (HTMLDocument.TextField val : values) {
-               if(!field.equals("p-geo")) {
+               if(!field.equals("geo")) {
                         conditionallyAddStringProperty(
                                 val.source(),
-                                adr, vVCARD.getProperty(field.replaceFirst("p-", "")), val.value()
+                                adr, vVCARD.getProperty(field), val.value()
                         );
                }else {
                    String[] composed = val.value().split(";");
-                   if (composed.length == 3){
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.latitude, composed[0]
-                       );
+                   for(int counter=0;counter<composed.length;counter++){
                        conditionallyAddStringProperty(
                                val.source(),
-                               adr, vVCARD.longitude, composed[1]
-                       );
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.altitude, composed[2]
+                               adr, vVCARD.getProperty(geoFields[counter]), composed[counter]
                        );
 
-                   }else if (composed.length == 2){
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.latitude, composed[0]
-                       );
-                       conditionallyAddStringProperty(
-                               val.source(),
-                               adr, vVCARD.longitude, composed[1]
-                       );
-                   }else {
-                       //we discard if only length is 1
                    }
-
                }
-
             }
         }
-
+        addGeoAsUrlResource(adr,document);
         final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
-        tser.addResourceRoot( document.getPathToLocalRoot(), adr, this.getClass() );
-
+        tser.addResourceRoot( document.getPathToLocalRoot(), adr, this.getClass());
         return true;
     }
 
+    private void addGeoAsUrlResource(Resource card,HTMLDocument document) throws ExtractionException {
+        HTMLDocument.TextField[] links = document.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX+"geo");
+        for (HTMLDocument.TextField link : links) {
+            conditionallyAddResourceProperty(card, vVCARD.geo, getHTMLDocument().resolveURI(link.value()));
+        }
+    }
+
     @Override
     public ExtractorDescription getDescription() {
         return HAdrExtractorFactory.getDescriptionInstance();

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
index 4a1fbfd..c9c061a 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HGeoExtractor.java
@@ -26,6 +26,9 @@ import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
 import org.apache.any23.extractor.html.HTMLDocument;
+
+import java.util.ArrayList;
+
 /**
  * Extractor for the <a href="http://microformats.org/wiki/h-geo">h-geo</a>
  * microformat.
@@ -36,13 +39,19 @@ public class HGeoExtractor extends EntityBasedMicroformatExtractor {
 
     private static final VCard vVCARD = VCard.getInstance();
 
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
+    };
+
     @Override
     public ExtractorDescription getDescription() {
         return HGeoExtractorFactory.getDescriptionInstance();
     }
 
     protected String getBaseClassName() {
-        return "h-geo";
+        return Microformats2Prefixes.CLASS_PREFIX+"geo";
     }
 
     @Override
@@ -53,31 +62,32 @@ public class HGeoExtractor extends EntityBasedMicroformatExtractor {
     protected boolean extractEntity(Node node, ExtractionResult out) {
         if (null == node) return false;
         final HTMLDocument document = new HTMLDocument(node);
-        HTMLDocument.TextField latNode = document.getSingularTextField("p-latitude");
-        HTMLDocument.TextField lonNode = document.getSingularTextField("p-longitude");
-        HTMLDocument.TextField altNode = document.getSingularTextField("p-altitude");
-        String lat = latNode.value();
-        String lon = lonNode.value();
-        String alt = altNode.value();
         BNode geo = getBlankNodeFor(node);
         out.writeTriple(geo, RDF.TYPE, vVCARD.Location);
         final String extractorName = getDescription().getExtractorName();
-        conditionallyAddStringProperty(
-                latNode.source(),
-                geo, vVCARD.latitude , lat
-        );
-        conditionallyAddStringProperty(
-                lonNode.source(),
-                geo, vVCARD.longitude, lon
-        );
-        conditionallyAddStringProperty(
-                altNode.source(),
-                geo, vVCARD.altitude, alt
-        );
-
+        ArrayList<HTMLDocument.TextField> geoNodes = new ArrayList<HTMLDocument.TextField>();
+        for(String field : geoFields){
+            geoNodes.add(document.getSingularTextField(Microformats2Prefixes.PROPERTY_PREFIX+field));
+        }
+        if(geoNodes.get(0).source()==null){
+            String[] composed = document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX +"geo")
+                                        .value().split(";");
+            for(int counter=0;counter<composed.length;counter++){
+                conditionallyAddStringProperty(
+                        document.getSingularUrlField(Microformats2Prefixes.CLASS_PREFIX+"geo").source(),
+                        geo, vVCARD.getProperty(geoFields[counter]), composed[counter]
+                );
+            }
+        }else{
+            for(int counter=0;counter<geoNodes.size();counter++){
+                conditionallyAddStringProperty(
+                        geoNodes.get(counter).source(),
+                        geo, vVCARD.getProperty(geoFields[counter]) , geoNodes.get(counter).value()
+                );
+            }
+        }
         final TagSoupExtractionResult tser = (TagSoupExtractionResult) getCurrentExtractionResult();
         tser.addResourceRoot( document.getPathToLocalRoot(), geo, this.getClass() );
-
         return true;
     }
     

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
new file mode 100644
index 0000000..18ac1b1
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+public class Microformats2Prefixes {
+    public static final String CLASS_PREFIX = "h-";
+    public static final String PROPERTY_PREFIX = "p-";
+    public static final String URL_PROPERTY_PREFIX = "u-";
+    public static final String EMBEDDED_PROPERTY_PREFIX = "e-";
+    public static final String TIME_PROPERTY_PREFIX = "dt-";
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
index 0fb3625..69abb55 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
@@ -32,6 +32,6 @@ public class HAdrExtractorTest extends AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException , RDFHandlerException {
         assertExtract("/microformats2/h-adr/h-adr-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 10);
+        assertStatementsSize(null, null, null, 11);
     }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
index eba89de..0d29fda 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HGeoExtractorTest.java
@@ -41,7 +41,7 @@ public class HGeoExtractorTest extends AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException , RDFHandlerException {
         assertExtract("/microformats2/h-geo/h-geo-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 4);
+        assertStatementsSize(null, null, null, 8);
     }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
index b5c095a..5438b90 100644
--- a/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
+++ b/test-resources/src/test/resources/microformats2/h-adr/h-adr-test.html
@@ -18,16 +18,17 @@
 <html>
 
   <body>
-    <!-- Microformats 2 -->
-    <div class="h-adr">
-      <span class="p-street-address">349/B</span> 
-      <span class="p-extended-address">Batagama,North</span>   
-      <span class="p-locality">Jaela</span>
-      <span class="p-region">Western</span>
-      <span class="p-postal-code">11325</span>    
-      <span class="p-country-name">SL</span></span>
-      <span class="p-geo">51.526421;-0.081067;25</span> 
-    </div>
+      <!-- Microformats 2 -->
+      <div class="h-adr">
+          <span class="p-street-address">349/B</span>
+          <span class="p-extended-address">Batagama,North</span>
+          <span class="p-locality">Jaela</span>
+          <span class="p-region">Western</span>
+          <span class="p-postal-code">11325</span>
+          <span class="p-country-name">SL</span></span>
+          <span class="p-geo">51.526421;-0.081067;25</span>
+          <a class="u-geo" href="geo:51.526421;-0.081067;crs=wgs84;u=40">Home</a>
+      </div>
   </body>
 
 </html>

http://git-wip-us.apache.org/repos/asf/any23/blob/ff816027/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
index c0181fe..38d906f 100644
--- a/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
+++ b/test-resources/src/test/resources/microformats2/h-geo/h-geo-test.html
@@ -20,13 +20,13 @@
 <body>
    <!-- Microformats 2 -->
 
-   <p>
-     <span class="h-geo">
+   <span class="h-geo">
        <span class="p-latitude">54.155278</span>,
        <span class="p-longitude">-2.249722</span>
        <span class="p-altitude">694</span>
-     </span>
-   </p>
+   </span>
+
+   <span class="h-geo">51.513458;-0.14812;50</span>
 
 </body>
 


[09/13] any23 git commit: add HCard extractor and completed all the toDos related to hcard dependencies

Posted by le...@apache.org.
add HCard extractor and completed all the toDos related to hcard dependencies


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/cf48a5bf
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/cf48a5bf
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/cf48a5bf

Branch: refs/heads/master
Commit: cf48a5bf88b40bc327108a4daa857e14d914d654
Parents: 417b71a
Author: Nisala <ni...@gmail.com>
Authored: Wed Aug 26 23:41:22 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Wed Aug 26 23:41:22 2015 +0530

----------------------------------------------------------------------
 .../main/java/org/apache/any23/vocab/HCard.java |  86 ++++
 .../html/microformats2/HCardExtractor.java      | 450 +++++++++++++++++++
 .../microformats2/HCardExtractorFactory.java    |  57 +++
 .../html/microformats2/HEntryExtractor.java     |  20 +-
 .../html/microformats2/HEventExtractor.java     |  64 ++-
 .../html/microformats2/HProductExtractor.java   |  21 +-
 .../html/microformats2/HResumeExtractor.java    |  37 +-
 .../apache/any23/prefixes/prefixes.properties   |   1 +
 .../html/microformats2/HAdrExtractorTest.java   |   2 +-
 .../html/microformats2/HCardExtractorTest.java  |  37 ++
 .../html/microformats2/HEntryExtractorTest.java |   2 +-
 .../html/microformats2/HEventExtractorTest.java |   2 +-
 .../apache/any23/vocab/RDFSchemaUtilsTest.java  |   4 +-
 .../microformats2/h-card/h-card-test.html       |  45 ++
 .../microformats2/h-entry/h-entry-test.html     |  21 +
 15 files changed, 829 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/api/src/main/java/org/apache/any23/vocab/HCard.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HCard.java b/api/src/main/java/org/apache/any23/vocab/HCard.java
new file mode 100644
index 0000000..b22e58c
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HCard.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/hcard">h-card</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HCard extends Vocabulary {
+    public static final String NS = SINDICE.NS + "hcard/";
+
+    private static HCard instance;
+
+    public static HCard getInstance() {
+        if(instance == null) {
+            instance = new HCard();
+        }
+        return instance;
+    }
+
+    public URI Card  = createClass(NS, "Card");
+    public URI Address   = createClass(NS, "Address");
+    public URI Geo = createClass(NS, "Geo");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI honorific_prefix   = createProperty(NS, "honorific-prefix");
+    public URI given_name   = createProperty(NS, "given-name");
+    public URI additional_name   = createProperty(NS, "additional-name");
+    public URI family_name   = createProperty(NS, "family-name");
+    public URI sort_string   = createProperty(NS, "sort-string");
+    public URI honorific_suffix   = createProperty(NS, "honorific-suffix");
+    public URI nickname  = createProperty(NS, "nickname");
+    public URI email   = createProperty(NS, "email");
+    public URI logo   = createProperty(NS, "logo");
+    public URI photo  = createProperty(NS, "photo");
+    public URI url   = createProperty(NS, "url");
+    public URI uid   = createProperty(NS, "uid");
+    public URI category   = createProperty(NS, "category");
+    public URI tel  = createProperty(NS, "tel");
+    public URI note   = createProperty(NS, "note");
+    public URI bday   = createProperty(NS, "bday");
+    public URI key  = createProperty(NS, "key");
+    public URI org   = createProperty(NS, "org");
+    public URI job_title   = createProperty(NS, "job-title");
+    public URI role   = createProperty(NS, "role");
+    public URI impp   = createProperty(NS, "impp");
+    public URI sex  = createProperty(NS, "sex");
+    public URI gender_identity   = createProperty(NS, "gender-identity");
+    public URI anniversary   = createProperty(NS, "anniversary");
+    public URI geo   = createProperty(NS, "geo");
+    public URI adr   = createProperty(NS, "adr");
+
+    public URI street_address  = createProperty(NS, "street-address");
+    public URI extended_address   = createProperty(NS, "extended-address");
+    public URI locality   = createProperty(NS, "locality");
+    public URI region   = createProperty(NS, "region");
+    public URI postal_code   = createProperty(NS, "postal-code");
+    public URI country_name   = createProperty(NS, "country-name");
+
+    public URI latitude   = createProperty(NS, "latitude");
+    public URI longitude   = createProperty(NS, "longitude");
+    public URI altitude   = createProperty(NS, "altitude");
+
+    private HCard() {
+        super(NS);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractor.java
new file mode 100644
index 0000000..ebdd77b
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractor.java
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.extractor.html.HTMLDocument;
+import org.apache.any23.vocab.HCard;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.DomUtils;
+
+import java.util.List;
+
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/hcard">h-Card</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HCardExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HCard vCARD = HCard.getInstance();
+
+    private static final String[] cardFields = {
+            "name",
+            "honorific-prefix",
+            "given-name",
+            "additional-name",
+            "family-name",
+            "sort-string",
+            "honorific-suffix",
+            "nickname",
+            "email",
+            "logo",
+            "photo",
+            "url",
+            "uid",
+            "category",
+            "tel",
+            "note",
+            "bday",
+            "key",
+            "org",
+            "job-title",
+            "role",
+            "impp",
+            "sex",
+            "gender-identity",
+            "anniversary",
+            "adr",
+            "geo"
+    };
+
+    private static final String[] addressFields = {
+            "street-address",
+            "extended-address",
+            "locality",
+            "region",
+            "postal-code",
+            "country-name",
+            "geo"
+    };
+
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
+    };
+
+
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HCardExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"card";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        //empty
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode card = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(card, RDF.TYPE, vCARD.Card);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, card);
+        addHonorificPrefix(fragment, card);
+        addGivenName(fragment, card);
+        addAdditionalName(fragment, card);
+        addFamilyName(fragment, card);
+        addSortString(fragment, card);
+        addHonorificSuffix(fragment, card);
+        addNickname(fragment, card);
+        addEmails(fragment, card);
+        addLogo(fragment, card);
+        addPhoto(fragment, card);
+        addURLs(fragment, card);
+        addUID(fragment, card);
+        addCategories(fragment, card);
+        addTelephones(fragment, card);
+        addNotes(fragment, card);
+        addBday(fragment, card);
+        addKey(fragment, card);
+        addOrg(fragment, card);
+        addJobTitle(fragment, card);
+        addRole(fragment, card);
+        addImpp(fragment, card);
+        addSex(fragment, card);
+        addGenderIdentity(fragment, card);
+        addAnniversary(fragment, card);
+        addGeo(fragment, card);
+        addAdr(fragment, card);
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
+        tser.addResourceRoot( DomUtils.getXPathListForNode(node), card, this.getClass() );
+        return true;
+    }
+
+    public Resource extractEntityAsEmbeddedProperty(HTMLDocument fragment, BNode card,
+                                                    ExtractionResult out)
+            throws ExtractionException {
+        this.setCurrentExtractionResult(out);
+        addName(fragment, card);
+        addHonorificPrefix(fragment, card);
+        addGivenName(fragment, card);
+        addAdditionalName(fragment, card);
+        addFamilyName(fragment, card);
+        addSortString(fragment, card);
+        addHonorificSuffix(fragment, card);
+        addNickname(fragment, card);
+        addEmails(fragment, card);
+        addLogo(fragment, card);
+        addPhoto(fragment, card);
+        addURLs(fragment, card);
+        addUID(fragment, card);
+        addCategories(fragment, card);
+        addTelephones(fragment, card);
+        addNotes(fragment, card);
+        addBday(fragment, card);
+        addKey(fragment, card);
+        addOrg(fragment, card);
+        addJobTitle(fragment, card);
+        addRole(fragment, card);
+        addImpp(fragment, card);
+        addSex(fragment, card);
+        addGenderIdentity(fragment, card);
+        addAnniversary(fragment, card);
+        addGeo(fragment, card);
+        addAdr(fragment, card);
+        return card;
+    }
+
+
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode card, String fieldClass,
+                                      URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), card, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[0], vCARD.name);
+    }
+
+    private void addHonorificPrefix(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[1], vCARD.honorific_prefix);
+    }
+
+    private void addGivenName(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[2], vCARD.given_name);
+    }
+
+    private void addAdditionalName(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[3], vCARD.additional_name);
+    }
+
+    private void addFamilyName(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[4], vCARD.family_name);
+    }
+
+    private void addSortString(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[5], vCARD.sort_string);
+    }
+
+    private void addHonorificSuffix(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[6], vCARD.honorific_suffix);
+    }
+
+    private void addNickname(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[7], vCARD.nickname);
+    }
+
+    private void addEmails(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField[] emails = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[8]);
+        for(HTMLDocument.TextField email : emails) {
+            addURIProperty(card, vCARD.email, fragment.resolveURI(email.value()));
+
+        }
+    }
+
+    private void addLogo(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField logo = fragment.getSingularUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[9]);
+        if(logo.source()==null)
+            return;
+        addURIProperty(card, vCARD.logo, fragment.resolveURI(logo.value()));
+    }
+
+    private void addPhoto(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField photo = fragment.getSingularUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[10]);
+        if(photo.source()==null)
+            return;
+        addURIProperty(card, vCARD.photo, fragment.resolveURI(photo.value()));
+    }
+
+    private void addURLs(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[11]);
+        for(HTMLDocument.TextField url : urls) {
+            addURIProperty(card, vCARD.url, fragment.resolveURI(url.value()));
+
+        }
+    }
+
+    private void addUID(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField uid = fragment.getSingularUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[12]);
+        if(uid.source()==null)
+            return;
+        addURIProperty(card, vCARD.uid, fragment.resolveURI(uid.value()));
+    }
+
+
+    private void addCategories(HTMLDocument fragment, BNode entry) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + cardFields[13]);
+        for (HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), entry, vCARD.category, category.value()
+            );
+        }
+    }
+
+    private void addTelephones(HTMLDocument fragment, BNode card) {
+        final HTMLDocument.TextField[] telephones = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + cardFields[14]);
+        for (HTMLDocument.TextField tel : telephones) {
+            Node attribute=tel.source().getAttributes().getNamedItem("value");
+            if (attribute==null){
+                conditionallyAddStringProperty(
+                        tel.source(), card, vCARD.tel, tel.value()
+                );
+            }else{
+                conditionallyAddStringProperty(
+                        tel.source(), card, vCARD.tel, attribute.getNodeValue()
+                );
+            }
+        }
+    }
+
+    private void addNotes(HTMLDocument fragment, BNode entry) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + cardFields[15]);
+        for (HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), entry, vCARD.note, category.value()
+            );
+        }
+    }
+
+    private void addBday(HTMLDocument fragment, BNode card) {
+        final HTMLDocument.TextField bday = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + cardFields[16]);
+        if (bday.source() == null)
+            return;
+
+        Node attribute = bday.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    bday.source(),
+                    card, vCARD.bday, bday.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    bday.source(),
+                    card, vCARD.bday, attribute.getNodeValue()
+            );
+
+        }
+    }
+
+    private void addKey(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField uid = fragment.getSingularTextField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[17]);
+        if(uid.source()==null)
+            return;
+        addURIProperty(card, vCARD.key, fragment.resolveURI(uid.value()));
+    }
+
+    private void addOrg(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[18], vCARD.org);
+    }
+
+    private void addJobTitle(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[19], vCARD.job_title);
+    }
+
+    private void addRole(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[20], vCARD.role);
+    }
+
+    private void addImpp(HTMLDocument fragment, BNode card) throws ExtractionException {
+        final HTMLDocument.TextField impp = fragment.getSingularTextField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + cardFields[21]);
+        if(impp.source()==null)
+            return;
+        addURIProperty(card, vCARD.impp, fragment.resolveURI(impp.value()));
+    }
+
+    private void addSex(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[22], vCARD.sex);
+    }
+
+    private void addGenderIdentity(HTMLDocument fragment, BNode card) {
+        mapFieldWithProperty(fragment, card, Microformats2Prefixes.PROPERTY_PREFIX +
+                cardFields[23], vCARD.gender_identity);
+    }
+
+
+    private void addAnniversary(HTMLDocument fragment, BNode card) {
+        final HTMLDocument.TextField anniversary = fragment.getSingularTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + cardFields[24]);
+        if (anniversary.source() == null)
+            return;
+
+        Node attribute = anniversary.source().getAttributes().getNamedItem("datetime");
+        if (attribute == null) {
+            conditionallyAddStringProperty(
+                    anniversary.source(),
+                    card, vCARD.bday, anniversary.value()
+            );
+        } else {
+            conditionallyAddStringProperty(
+                    anniversary.source(),
+                    card, vCARD.bday, attribute.getNodeValue()
+            );
+
+        }
+    }
+
+    private void addAdr(HTMLDocument doc, Resource card) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + cardFields[25] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + cardFields[25]);
+        if (nodes.isEmpty())
+            return;
+        for (Node node : nodes) {
+            BNode location = valueFactory.createBNode();
+            addURIProperty(location, RDF.TYPE, vCARD.Address);
+            HTMLDocument fragment = new HTMLDocument(node);
+            for (String field : addressFields) {
+                HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
+                for (HTMLDocument.TextField val : values) {
+                    if(!field.equals("geo")) {
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vCARD.getProperty(field), val.value()
+                        );
+                    }else {
+                        addGeo(new HTMLDocument(node),card);
+                    }
+                }
+            }
+        }
+    }
+
+    private void addGeo(HTMLDocument doc, Resource card) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + cardFields[26] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + cardFields[26]);
+        if (nodes.isEmpty())
+            return;
+        for (Node node : nodes) {
+            BNode location = valueFactory.createBNode();
+            addURIProperty(location, RDF.TYPE, vCARD.Geo);
+            HTMLDocument fragment = new HTMLDocument(node);
+            for (String field : geoFields) {
+                HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
+                for (HTMLDocument.TextField val : values) {
+                    Node attribute=val.source().getAttributes().getNamedItem("title");
+                    if (attribute==null){
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vCARD.getProperty(field), val.value()
+                        );
+                    }else{
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vCARD.getProperty(field), attribute.getNodeValue()
+                        );
+                    }
+                }
+            }
+        }
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractorFactory.java
new file mode 100644
index 0000000..5a7d63e
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HCardExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HCardExtractorFactory extends SimpleExtractorFactory<HCardExtractor> implements
+        ExtractorFactory<HCardExtractor> {
+
+    public static final String NAME = "html-mf2-h-card";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hcard");
+
+    private static final ExtractorDescription descriptionInstance = new HCardExtractorFactory();
+    
+    public HCardExtractorFactory() {
+        super(
+                HCardExtractorFactory.NAME, 
+                HCardExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-card.html");
+    }
+    
+    @Override
+    public HCardExtractor createExtractor() {
+        return new HCardExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
index 8c0c50f..3a85b5b 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
@@ -55,7 +55,7 @@ public class HEntryExtractor extends EntityBasedMicroformatExtractor {
             "uid",
             "syndication",
             "in-reply-to",
-            "author", //toDo HCard
+            "author",
             "location",
 
     };
@@ -96,10 +96,26 @@ public class HEntryExtractor extends EntityBasedMicroformatExtractor {
         addUID(fragment, entry);
         addSyndications(fragment, entry);
         addInReplyTo(fragment, entry);
-        addLocations(fragment,entry);
+        addLocations(fragment, entry);
+        addAuthors(fragment, entry);
         return true;
     }
 
+    private void addAuthors(HTMLDocument doc, Resource entry) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + entryFields[10] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card");
+        if (nodes.isEmpty())
+            return;
+        HCardExtractorFactory factory = new HCardExtractorFactory();
+        HCardExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode author = valueFactory.createBNode();
+            addURIProperty(author, RDF.TYPE, vEntry.author);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), author,
+                    getCurrentExtractionResult());
+        }
+    }
+
     private void mapFieldWithProperty(HTMLDocument fragment, BNode entry, String fieldClass,
                                       URI property) {
         HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
index ea90716..3f4d817 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -23,6 +23,7 @@ import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.TagSoupExtractionResult;
 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
 import org.apache.any23.vocab.HEvent;
+import org.apache.any23.vocab.VCard;
 import org.openrdf.model.BNode;
 import org.openrdf.model.Resource;
 import org.openrdf.model.URI;
@@ -30,6 +31,8 @@ import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
 import org.apache.any23.extractor.html.HTMLDocument;
 
+import java.util.List;
+
 import static org.apache.any23.extractor.html.HTMLDocument.TextField;
 
 
@@ -42,6 +45,7 @@ import static org.apache.any23.extractor.html.HTMLDocument.TextField;
 public class HEventExtractor extends EntityBasedMicroformatExtractor {
 
     private static final HEvent vEvent = HEvent.getInstance();
+    private static final VCard vVCARD = VCard.getInstance();
 
     private String[] eventFields = {
             "name",
@@ -52,8 +56,14 @@ public class HEventExtractor extends EntityBasedMicroformatExtractor {
             "description",
             "url",
             "category",
-            "location", //toDO
-            "attendee" //toDO
+            "location",
+            "attendee"
+    };
+
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
     };
 
 
@@ -85,7 +95,7 @@ public class HEventExtractor extends EntityBasedMicroformatExtractor {
         addDescription(fragment, event);
         addURLs(fragment, event);
         addCategories(fragment, event);
-        addLocation(fragment, event);
+        addLocations(fragment, event);
         
         return true;
     }
@@ -102,10 +112,26 @@ public class HEventExtractor extends EntityBasedMicroformatExtractor {
         addDescription(fragment, event);
         addURLs(fragment, event);
         addCategories(fragment, event);
-        addLocation(fragment, event);
+        addLocations(fragment, event);
+        addAttendees(fragment,event);
         return event;
     }
 
+    private void addAttendees(HTMLDocument doc, Resource entry) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + eventFields[9] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card");
+        if (nodes.isEmpty())
+            return;
+        HCardExtractorFactory factory = new HCardExtractorFactory();
+        HCardExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode attendee = valueFactory.createBNode();
+            addURIProperty(attendee, RDF.TYPE, vEvent.attendee);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), attendee,
+                    getCurrentExtractionResult());
+        }
+    }
+
     private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass,
                                       URI property) {
         HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
@@ -204,9 +230,33 @@ public class HEventExtractor extends EntityBasedMicroformatExtractor {
         }
     }
 
-    private void addLocation(HTMLDocument fragment, BNode event) {
-        mapFieldWithProperty(fragment, event, Microformats2Prefixes.PROPERTY_PREFIX +
-                eventFields[8], vEvent.location);
+    private void addLocations(HTMLDocument doc, Resource entry) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + eventFields[8] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "geo");
+        if (nodes.isEmpty())
+            return;
+        for (Node node : nodes) {
+            BNode location = valueFactory.createBNode();
+            addURIProperty(location, RDF.TYPE, vEvent.location);
+            HTMLDocument fragment = new HTMLDocument(node);
+            for (String field : geoFields) {
+                HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
+                for (HTMLDocument.TextField val : values) {
+                    Node attribute=val.source().getAttributes().getNamedItem("title");
+                    if (attribute==null){
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vVCARD.getProperty(field), val.value()
+                        );
+                    }else{
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vVCARD.getProperty(field), attribute.getNodeValue()
+                        );
+                    }
+                }
+            }
+        }
     }
 
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
index 0e93935..0673a1d 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HProductExtractor.java
@@ -24,10 +24,13 @@ import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
 import org.apache.any23.extractor.html.HTMLDocument;
 import org.apache.any23.vocab.HProduct;
 import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
 import org.openrdf.model.URI;
 import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
 
+import java.util.List;
+
 /**
  * Extractor for the <a href="http://microformats.org/wiki/h-product">h-product</a>
  * microformat.
@@ -41,7 +44,7 @@ public class HProductExtractor extends EntityBasedMicroformatExtractor {
     private static final String[] productFields = {
             "name",
             "photo",
-            "brand", //toDo
+            "brand",
             "category",
             "description",
             "url",
@@ -77,6 +80,7 @@ public class HProductExtractor extends EntityBasedMicroformatExtractor {
         addURLs(fragment, product);
         addIdentifiers(fragment, product);
         addPrice(fragment, product);
+        addBrand(fragment,product);
         return true;
     }
 
@@ -150,4 +154,19 @@ public class HProductExtractor extends EntityBasedMicroformatExtractor {
             );
         }
     }
+
+    private void addBrand(HTMLDocument doc, Resource product) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + productFields[2] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card");
+        if (nodes.isEmpty())
+            return;
+        HCardExtractorFactory factory = new HCardExtractorFactory();
+        HCardExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode brand = valueFactory.createBNode();
+            addURIProperty(brand, RDF.TYPE, vProduct.brand);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), brand,
+                    getCurrentExtractionResult());
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
index 44b463d..2026219 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
@@ -21,10 +21,7 @@ import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
 import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.vocab.DOAC;
-import org.apache.any23.vocab.FOAF;
 import org.apache.any23.vocab.HResume;
-import org.apache.commons.lang.UnhandledException;
 import org.openrdf.model.BNode;
 import org.openrdf.model.Resource;
 import org.openrdf.model.vocabulary.RDF;
@@ -47,11 +44,11 @@ public class HResumeExtractor extends EntityBasedMicroformatExtractor {
     private static final String[] resumeFields = {
             "name",
             "summary",
-            "contact",//toDo Hcard
+            "contact",
             "education",
             "experience",
             "skill",
-            "affiliation"//toDo Hcard
+            "affiliation"
     };
 
     @Override
@@ -94,6 +91,36 @@ public class HResumeExtractor extends EntityBasedMicroformatExtractor {
         return true;
     }
 
+    private void addContacts(HTMLDocument doc, Resource entry) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[2] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card");
+        if (nodes.isEmpty())
+            return;
+        HCardExtractorFactory factory = new HCardExtractorFactory();
+        HCardExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode contact = valueFactory.createBNode();
+            addURIProperty(contact, RDF.TYPE, vResume.contact);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), contact,
+                    getCurrentExtractionResult());
+        }
+    }
+
+    private void addAffiliations(HTMLDocument doc, Resource entry) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[6] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "card");
+        if (nodes.isEmpty())
+            return;
+        HCardExtractorFactory factory = new HCardExtractorFactory();
+        HCardExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode affiliation = valueFactory.createBNode();
+            addURIProperty(affiliation, RDF.TYPE, vResume.affiliation);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), affiliation,
+                    getCurrentExtractionResult());
+        }
+    }
+
     private void addName(HTMLDocument doc, Resource person) {
         HTMLDocument.TextField name = doc.getSingularTextField(
                 Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[0]);

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
index c7eaf54..2f9183d 100644
--- a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
+++ b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
@@ -33,6 +33,7 @@ wo=http://purl.org/ontology/wo/
 skos=http://www.w3.org/2004/02/skos/core#
 hrecipe=http://sindice.com/hrecipe/
 hevent=http://sindice.com/hevent/
+hcard=http://sindice.com/hcard/
 hproduct=http://sindice.com/hproduct/
 hitem=http://sindice.com/hitem/
 hentry=http://sindice.com/hentry/

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
index 69abb55..e857105 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HAdrExtractorTest.java
@@ -34,4 +34,4 @@ public class HAdrExtractorTest extends AbstractExtractorTestCase {
         assertModelNotEmpty();
         assertStatementsSize(null, null, null, 11);
     }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/test/java/org/apache/any23/extractor/html/microformats2/HCardExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HCardExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HCardExtractorTest.java
new file mode 100644
index 0000000..9c9dc06
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HCardExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HCardExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HCardExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException , RDFHandlerException {
+        assertExtract("/microformats2/h-card/h-card-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 9);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
index cc2974d..96f3a6e 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
@@ -32,6 +32,6 @@ public class HEntryExtractorTest extends AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
         assertExtract("/microformats2/h-entry/h-entry-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 10);
+        assertStatementsSize(null, null, null, 20);
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
index 6c13909..70b212e 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEventExtractorTest.java
@@ -32,6 +32,6 @@ public class HEventExtractorTest extends AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
         assertExtract("/microformats2/h-event/h-event-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 9);
+        assertStatementsSize(null, null, null, 8);
     }
 }

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
index c58e2a1..64fb4b7 100644
--- a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
+++ b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
@@ -43,7 +43,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesNTriples() {
-        serializeVocabularies(RDFFormat.NTRIPLES, 2012);//1920
+        serializeVocabularies(RDFFormat.NTRIPLES, 2090);
     }
 
     /**
@@ -53,7 +53,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesRDFXML() {
-        serializeVocabularies(RDFFormat.RDFXML, 5252); // Effective lines + separators. //4992
+        serializeVocabularies(RDFFormat.RDFXML, 5453); // Effective lines + separators.
     }
 
     private void serializeVocabularies(RDFFormat format, int expectedLines) {

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/test-resources/src/test/resources/microformats2/h-card/h-card-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-card/h-card-test.html b/test-resources/src/test/resources/microformats2/h-card/h-card-test.html
new file mode 100644
index 0000000..f5ffb56
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-card/h-card-test.html
@@ -0,0 +1,45 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE html>
+
+<html>
+
+<body>
+   <div class="h-card">
+       
+
+        <!-- Microformats 2 -->
+
+       <p class="p-name">Joe Bloggs</p>
+       
+       <p><img class="u-photo" src="http://Joe.net/picture.jpg" /></p>
+       
+       <p class="u-url" href="http://linkedin.au/Joe/">Professional Profile</p>
+
+       <time class="dt-bday" datetime="1989-10-27">27 Oct 1989</time>
+       
+       <p class="p-adr h-adr">
+           <span class="p-street-address">17 Austerstræti</span>
+           <span class="p-locality">Reykjavík</span>
+           <span class="p-country-name">Iceland</span>
+       </p>
+
+
+   </div>
+
+</body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/cf48a5bf/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html b/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
index f3c8cf7..adc2a1a 100644
--- a/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
+++ b/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
@@ -23,6 +23,27 @@
         <time class="dt-published" datetime="2012-03-25T17:08:26">March 25th, 2012</time> 
     </p>
 	
+    <div class="p-author h-card">
+       
+
+        <!-- Microformats 2 -->
+
+       <p class="p-name">Joe Bloggs</p>
+       
+       <p><img class="u-photo" src="http://Joe.net/picture.jpg" /></p>
+       
+       <p class="u-url" href="http://linkedin.au/Joe/">Professional Profile</p>
+
+       <time class="dt-bday" datetime="1989-10-27">27 Oct 1989</time>
+       
+       <p class="p-adr h-adr">
+           <span class="p-street-address">17 Austerstræti</span>
+           <span class="p-locality">Reykjavík</span>
+           <span class="p-country-name">Iceland</span>
+       </p>
+
+
+    </div>
     <div class="e-content">
         <p class="p-summary">Last week the microformats.org community 
             celebrated its 7th birthday at a gathering hosted by Mozilla in 


[08/13] any23 git commit: adding HEntry and HResume extractors

Posted by le...@apache.org.
adding HEntry and HResume extractors


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/417b71a7
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/417b71a7
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/417b71a7

Branch: refs/heads/master
Commit: 417b71a757ecb444a98cebeb25f48faa1c27524f
Parents: 0008c7c
Author: Nisala <ni...@gmail.com>
Authored: Sun Aug 23 21:39:34 2015 +0530
Committer: Nisala <ni...@gmail.com>
Committed: Sun Aug 23 21:39:34 2015 +0530

----------------------------------------------------------------------
 .../java/org/apache/any23/vocab/HEntry.java     |  60 +++++
 .../main/java/org/apache/any23/vocab/HItem.java |  17 ++
 .../java/org/apache/any23/vocab/HResume.java    |  54 +++++
 .../extractor/html/MicroformatExtractor.java    |   5 +
 .../html/microformats2/HEntryExtractor.java     | 234 +++++++++++++++++++
 .../microformats2/HEntryExtractorFactory.java   |  60 +++++
 .../html/microformats2/HEventExtractor.java     |  17 ++
 .../microformats2/HItemExtractorFactory.java    |   2 +-
 .../html/microformats2/HResumeExtractor.java    | 162 +++++++++++++
 .../microformats2/HResumeExtractorFactory.java  |  57 +++++
 .../microformats2/Microformats2Prefixes.java    |   1 +
 .../apache/any23/prefixes/prefixes.properties   |   2 +
 .../html/microformats2/HEntryExtractorTest.java |  37 +++
 .../microformats2/HProductExtractorTest.java    |   2 +-
 .../microformats2/HResumeExtractorTest.java     |  37 +++
 .../apache/any23/vocab/RDFSchemaUtilsTest.java  |   4 +-
 .../microformats2/h-entry/h-entry-test.html     |  53 +++++
 .../microformats2/h-resume/h-resume-test.html   |  49 ++++
 18 files changed, 849 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/api/src/main/java/org/apache/any23/vocab/HEntry.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HEntry.java b/api/src/main/java/org/apache/any23/vocab/HEntry.java
new file mode 100644
index 0000000..e63907b
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HEntry.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * Vocabulary to map the <a href="http://microformats.org/wiki/hentry">h-entry</a> microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEntry extends Vocabulary {
+
+    public static final String NS = SINDICE.NS + "hentry/";
+
+    private static HEntry instance;
+
+    public static HEntry getInstance() {
+        if(instance == null) {
+            instance = new HEntry();
+        }
+        return instance;
+    }
+
+    public URI Entry  = createClass(NS, "Entry");
+    public URI author   = createClass(NS, "author");
+    public URI location = createClass(NS, "location");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI summary   = createProperty(NS, "summary");
+    public URI content   = createProperty(NS, "content");
+    public URI published   = createProperty(NS, "published");
+    public URI updated   = createProperty(NS, "updated");
+    public URI category   = createProperty(NS, "category");
+    public URI url   = createProperty(NS, "url");
+    public URI uid  = createProperty(NS, "uid");
+    public URI syndication   = createProperty(NS, "syndication");
+    public URI in_reply_to   = createProperty(NS, "in-reply-to");
+
+    private HEntry() {
+        super(NS);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/api/src/main/java/org/apache/any23/vocab/HItem.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HItem.java b/api/src/main/java/org/apache/any23/vocab/HItem.java
index db54e65..01bc5a2 100644
--- a/api/src/main/java/org/apache/any23/vocab/HItem.java
+++ b/api/src/main/java/org/apache/any23/vocab/HItem.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.any23.vocab;
 
 import org.openrdf.model.URI;

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/api/src/main/java/org/apache/any23/vocab/HResume.java
----------------------------------------------------------------------
diff --git a/api/src/main/java/org/apache/any23/vocab/HResume.java b/api/src/main/java/org/apache/any23/vocab/HResume.java
new file mode 100644
index 0000000..1a50157
--- /dev/null
+++ b/api/src/main/java/org/apache/any23/vocab/HResume.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.vocab;
+
+import org.openrdf.model.URI;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HResume extends Vocabulary {
+
+    public static final String NS = SINDICE.NS + "hresume/";
+
+    private static HResume instance;
+
+    public static HResume getInstance() {
+        if(instance == null) {
+            instance = new HResume();
+        }
+        return instance;
+    }
+
+    public URI Resume  = createClass(NS, "Resume");
+    public URI education   = createClass(NS, "education");
+    public URI experience = createClass(NS, "experience");
+    public URI contact = createClass(NS, "contact");
+    public URI affiliation = createClass(NS, "affiliation");
+
+
+    public URI name  = createProperty(NS, "name");
+    public URI summary   = createProperty(NS, "summary");
+    public URI skill   = createProperty(NS, "skill");
+
+
+    private HResume() {
+        super(NS);
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
index 51ee910..4de6e21 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java
@@ -113,6 +113,10 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
         return out;
     }
 
+    protected void setCurrentExtractionResult(ExtractionResult out) {
+        this.out = out;
+    }
+
     protected ExtractionResult openSubResult(ExtractionContext context) {
         return out.openSubResult(context);
     }
@@ -265,4 +269,5 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor {
         return false;
     }
 
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
new file mode 100644
index 0000000..8c0c50f
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractor.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+import org.apache.any23.vocab.HEntry;
+import org.apache.any23.vocab.VCard;
+import org.openrdf.model.BNode;
+import org.openrdf.model.URI;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.openrdf.model.Resource;
+
+import java.util.List;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-entry">h-entry</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEntryExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HEntry vEntry = HEntry.getInstance();
+    private static final VCard vVCARD = VCard.getInstance();
+
+    private static final String[] entryFields = {
+            "name",
+            "summary",
+            "content",
+            "published",
+            "updated",
+            "category",
+            "url",
+            "uid",
+            "syndication",
+            "in-reply-to",
+            "author", //toDo HCard
+            "location",
+
+    };
+
+    private static final String[] geoFields = {
+            "latitude",
+            "longitude",
+            "altitude"
+    };
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HEntryExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    protected String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX+"entry";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        final BNode entry = getBlankNodeFor(node);
+        conditionallyAddResourceProperty(entry, RDF.TYPE, vEntry.Entry);
+        final HTMLDocument fragment = new HTMLDocument(node);
+        addName(fragment, entry);
+        addSummary(fragment, entry);
+        addContent(fragment, entry);
+        addPublished(fragment, entry);
+        addUpdated(fragment, entry);
+        addCategories(fragment, entry);
+        addURLs(fragment, entry);
+        addUID(fragment, entry);
+        addSyndications(fragment, entry);
+        addInReplyTo(fragment, entry);
+        addLocations(fragment,entry);
+        return true;
+    }
+
+    private void mapFieldWithProperty(HTMLDocument fragment, BNode entry, String fieldClass,
+                                      URI property) {
+        HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);
+        conditionallyAddStringProperty(
+                title.source(), entry, property, title.value()
+        );
+    }
+
+    private void addName(HTMLDocument fragment, BNode entry) {
+        mapFieldWithProperty(fragment, entry, Microformats2Prefixes.PROPERTY_PREFIX +
+                entryFields[0], vEntry.name);
+    }
+
+    private void addSummary(HTMLDocument fragment, BNode entry) {
+        mapFieldWithProperty(fragment, entry, Microformats2Prefixes.PROPERTY_PREFIX + entryFields[1],
+                vEntry.summary);
+    }
+
+    private void addContent(HTMLDocument fragment, BNode entry) {
+        mapFieldWithProperty(fragment, entry, Microformats2Prefixes.EMBEDDED_PROPERTY_PREFIX + entryFields[2],
+                vEntry.content);
+    }
+
+    private void addPublished(HTMLDocument fragment, BNode entry) {
+        final HTMLDocument.TextField[] durations = fragment.getPluralTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + entryFields[3]);
+        for(HTMLDocument.TextField duration : durations) {
+            Node attribute=duration.source().getAttributes().getNamedItem("datetime");
+            if (attribute==null){
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        entry, vEntry.published, duration.value()
+                );
+            }else{
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        entry, vEntry.published, attribute.getNodeValue()
+                );
+            }
+        }
+    }
+
+    private void addUpdated(HTMLDocument fragment, BNode entry) {
+        final HTMLDocument.TextField[] durations = fragment.getPluralTextField(
+                Microformats2Prefixes.TIME_PROPERTY_PREFIX + entryFields[4]);
+        for(HTMLDocument.TextField duration : durations) {
+            Node attribute=duration.source().getAttributes().getNamedItem("datetime");
+            if (attribute==null){
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        entry, vEntry.updated, duration.value()
+                );
+            }else{
+                conditionallyAddStringProperty(
+                        duration.source(),
+                        entry, vEntry.updated, attribute.getNodeValue()
+                );
+            }
+        }
+    }
+
+    private void addCategories(HTMLDocument fragment, BNode entry) {
+        final HTMLDocument.TextField[] categories = fragment.getPluralTextField
+                (Microformats2Prefixes.PROPERTY_PREFIX + entryFields[5]);
+        for (HTMLDocument.TextField category : categories) {
+            conditionallyAddStringProperty(
+                    category.source(), entry, vEntry.category, category.value()
+            );
+        }
+    }
+
+    private void addURLs(HTMLDocument fragment, BNode entry) throws ExtractionException {
+        final HTMLDocument.TextField[] urls = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[6]);
+        for(HTMLDocument.TextField url : urls) {
+            addURIProperty(entry, vEntry.url, fragment.resolveURI(url.value()));
+        }
+    }
+
+    private void addUID(HTMLDocument fragment, BNode entry) throws ExtractionException {
+        final HTMLDocument.TextField uid = fragment.getSingularTextField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[7]);
+        if(uid.source()==null)
+            return;
+        addURIProperty(entry, vEntry.uid, fragment.resolveURI(uid.value()));
+    }
+
+    private void addSyndications(HTMLDocument fragment, BNode entry) throws ExtractionException {
+        final HTMLDocument.TextField[] syndications = fragment.getPluralUrlField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[8]);
+        for(HTMLDocument.TextField syndication : syndications) {
+            addURIProperty(entry, vEntry.syndication, fragment.resolveURI(syndication.value()));
+        }
+    }
+
+    private void addInReplyTo(HTMLDocument fragment, BNode entry) throws ExtractionException {
+        final HTMLDocument.TextField inReplyTo = fragment.getSingularTextField
+                (Microformats2Prefixes.URL_PROPERTY_PREFIX + entryFields[9]);
+        if(inReplyTo.source()==null)
+            return;
+        addURIProperty(entry, vEntry.in_reply_to, fragment.resolveURI(inReplyTo.value()));
+    }
+
+    private void addLocations(HTMLDocument doc, Resource entry) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + entryFields[11] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "geo");
+        if (nodes.isEmpty())
+            return;
+        for (Node node : nodes) {
+            BNode location = valueFactory.createBNode();
+            addURIProperty(location, RDF.TYPE, vEntry.location);
+            HTMLDocument fragment = new HTMLDocument(node);
+            for (String field : geoFields) {
+                HTMLDocument.TextField[] values = fragment.getPluralTextField(Microformats2Prefixes.PROPERTY_PREFIX+field);
+                for (HTMLDocument.TextField val : values) {
+                    Node attribute=val.source().getAttributes().getNamedItem("title");
+                    if (attribute==null){
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vVCARD.getProperty(field), val.value()
+                        );
+                    }else{
+                        conditionallyAddStringProperty(
+                                val.source(),
+                                location, vVCARD.getProperty(field), attribute.getNodeValue()
+                        );
+                    }
+                }
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorFactory.java
new file mode 100644
index 0000000..e2d4556
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorFactory.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+import java.util.Arrays;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/h-entry">h-entry</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HEntryExtractorFactory extends SimpleExtractorFactory<HEntryExtractor> implements
+        ExtractorFactory<HEntryExtractor> {
+
+    public static final String NAME = "html-mf2-h-entry";
+
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hentry");
+
+    private static final ExtractorDescription descriptionInstance = new HEntryExtractorFactory();
+
+    public HEntryExtractorFactory() {
+        super(
+                HEntryExtractorFactory.NAME,
+                HEntryExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-entry.html");
+    }
+
+    @Override
+    public HEntryExtractor createExtractor() {
+        return new HEntryExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
index ce67d86..ea90716 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HEventExtractor.java
@@ -24,6 +24,7 @@ import org.apache.any23.extractor.TagSoupExtractionResult;
 import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
 import org.apache.any23.vocab.HEvent;
 import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
 import org.openrdf.model.URI;
 import org.openrdf.model.vocabulary.RDF;
 import org.w3c.dom.Node;
@@ -89,6 +90,22 @@ public class HEventExtractor extends EntityBasedMicroformatExtractor {
         return true;
     }
 
+    public Resource extractEntityAsEmbeddedProperty(HTMLDocument fragment, BNode event,
+                                                    ExtractionResult out)
+            throws ExtractionException {
+        this.setCurrentExtractionResult(out);
+        addName(fragment, event);
+        addSummary(fragment, event);
+        addStart(fragment, event);
+        addEnd(fragment, event);
+        addDuration(fragment, event);
+        addDescription(fragment, event);
+        addURLs(fragment, event);
+        addCategories(fragment, event);
+        addLocation(fragment, event);
+        return event;
+    }
+
     private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass,
                                       URI property) {
         HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass);

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
index 8423686..14f20bd 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HItemExtractorFactory.java
@@ -17,7 +17,7 @@ public class HItemExtractorFactory extends SimpleExtractorFactory<HItemExtractor
 
     public static final String NAME = "html-mf2-h-item";
 
-    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hitem");
 
     private static final ExtractorDescription descriptionInstance = new HItemExtractorFactory();
 

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
new file mode 100644
index 0000000..44b463d
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractor.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.TagSoupExtractionResult;
+import org.apache.any23.vocab.DOAC;
+import org.apache.any23.vocab.FOAF;
+import org.apache.any23.vocab.HResume;
+import org.apache.commons.lang.UnhandledException;
+import org.openrdf.model.BNode;
+import org.openrdf.model.Resource;
+import org.openrdf.model.vocabulary.RDF;
+import org.w3c.dom.Node;
+import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor;
+import org.apache.any23.extractor.html.HTMLDocument;
+import org.apache.any23.extractor.html.DomUtils;
+import java.util.List;
+
+/**
+ * Extractor for the <a href="http://microformats.org/wiki/hresume">hResume</a>
+ * microformat.
+ *
+ * @author Nisala Nirmana
+ */
+public class HResumeExtractor extends EntityBasedMicroformatExtractor {
+
+    private static final HResume vResume = HResume.getInstance();
+
+    private static final String[] resumeFields = {
+            "name",
+            "summary",
+            "contact",//toDo Hcard
+            "education",
+            "experience",
+            "skill",
+            "affiliation"//toDo Hcard
+    };
+
+    @Override
+    public ExtractorDescription getDescription() {
+        return HResumeExtractorFactory.getDescriptionInstance();
+    }
+
+    @Override
+    public String getBaseClassName() {
+        return Microformats2Prefixes.CLASS_PREFIX + "resume";
+    }
+
+    @Override
+    protected void resetExtractor() {
+        // Empty.
+    }
+
+    @Override
+    protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
+        if (null == node) return false;
+        BNode person = getBlankNodeFor(node);
+        out.writeTriple(person, RDF.TYPE, vResume.Resume);
+        final HTMLDocument fragment = new HTMLDocument(node);
+
+        addName(fragment, person);
+        addSummary(fragment, person);
+        addSkills(fragment, person);
+
+        addExperiences(fragment, person);
+        addEducations(fragment, person);
+
+
+        final TagSoupExtractionResult tser = (TagSoupExtractionResult) out;
+        tser.addResourceRoot(
+                DomUtils.getXPathListForNode(node),
+                person,
+                this.getClass()
+        );
+
+        return true;
+    }
+
+    private void addName(HTMLDocument doc, Resource person) {
+        HTMLDocument.TextField name = doc.getSingularTextField(
+                Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[0]);
+        conditionallyAddStringProperty(
+                name.source(),
+                person,
+                vResume.name,
+                name.value()
+        );
+    }
+
+    private void addSummary(HTMLDocument doc, Resource person) {
+        HTMLDocument.TextField summary = doc.getSingularTextField(
+                Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[1]);
+        conditionallyAddStringProperty(
+                summary.source(),
+                person,
+                vResume.summary,
+                summary.value()
+        );
+    }
+
+    private void addSkills(HTMLDocument doc, Resource person) {
+        final HTMLDocument.TextField[] skills = doc.getPluralTextField(
+                Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[5]);
+        for (HTMLDocument.TextField skill : skills) {
+            conditionallyAddStringProperty(
+                    skill.source(),
+                    person,
+                    vResume.skill,
+                    skill.value()
+            );
+        }
+
+    }
+
+    private void addExperiences(HTMLDocument doc, Resource person) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[4] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "event");
+        if (nodes.isEmpty())
+            return;
+        HEventExtractorFactory factory = new HEventExtractorFactory();
+        HEventExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode event = valueFactory.createBNode();
+            addURIProperty(event, RDF.TYPE, vResume.experience);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), event,
+                    getCurrentExtractionResult());
+        }
+    }
+
+    private void addEducations(HTMLDocument doc, Resource person) throws ExtractionException {
+        List<Node> nodes = doc.findAllByClassName(Microformats2Prefixes.PROPERTY_PREFIX + resumeFields[3] +
+                Microformats2Prefixes.SPACE_SEPARATOR + Microformats2Prefixes.CLASS_PREFIX + "event");
+        if (nodes.isEmpty())
+            return;
+        HEventExtractorFactory factory = new HEventExtractorFactory();
+        HEventExtractor extractor = factory.createExtractor();
+        for (Node node : nodes) {
+            BNode event = valueFactory.createBNode();
+            addURIProperty(event, RDF.TYPE, vResume.education);
+            extractor.extractEntityAsEmbeddedProperty(new HTMLDocument(node), event,
+                    getCurrentExtractionResult());
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorFactory.java
new file mode 100644
index 0000000..a8120eb
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+
+/**
+ * @author Nisala Nirmana
+ *
+ */
+public class HResumeExtractorFactory extends SimpleExtractorFactory<HResumeExtractor> implements
+        ExtractorFactory<HResumeExtractor> {
+
+    public static final String NAME = "html-mf2-h-resume";
+    
+    public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "doac", "foaf");
+
+    private static final ExtractorDescription descriptionInstance = new HResumeExtractorFactory();
+    
+    public HResumeExtractorFactory() {
+        super(
+                HResumeExtractorFactory.NAME, 
+                HResumeExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+                "example-mf2-h-resume.html");
+    }
+    
+    @Override
+    public HResumeExtractor createExtractor() {
+        return new HResumeExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
index 18ac1b1..d6b3349 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/Microformats2Prefixes.java
@@ -23,4 +23,5 @@ public class Microformats2Prefixes {
     public static final String URL_PROPERTY_PREFIX = "u-";
     public static final String EMBEDDED_PROPERTY_PREFIX = "e-";
     public static final String TIME_PROPERTY_PREFIX = "dt-";
+    public static final String SPACE_SEPARATOR = " ";
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
index 34e3975..c7eaf54 100644
--- a/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
+++ b/core/src/main/resources/org/apache/any23/prefixes/prefixes.properties
@@ -34,6 +34,8 @@ skos=http://www.w3.org/2004/02/skos/core#
 hrecipe=http://sindice.com/hrecipe/
 hevent=http://sindice.com/hevent/
 hproduct=http://sindice.com/hproduct/
+hitem=http://sindice.com/hitem/
+hentry=http://sindice.com/hentry/
 sindice=http://vocab.sindice.net/
 og=http://opengraphprotocol.org/schema/
 fb=http://www.facebook.com/2008/fbml#

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
new file mode 100644
index 0000000..cc2974d
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HEntryExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HEntryExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HEntryExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-entry/h-entry-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 10);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
index 3b46a7a..49c1755 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HProductExtractorTest.java
@@ -32,6 +32,6 @@ public class HProductExtractorTest extends AbstractExtractorTestCase {
     public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
         assertExtract("/microformats2/h-product/h-product-test.html");
         assertModelNotEmpty();
-        assertStatementsSize(null, null, null, 11);
+        assertStatementsSize(null, null, null, 6);
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/test/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorTest.java
new file mode 100644
index 0000000..dd2f5d1
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/html/microformats2/HResumeExtractorTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.html.microformats2;
+
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.html.AbstractExtractorTestCase;
+import org.junit.Test;
+import org.openrdf.repository.RepositoryException;
+import org.openrdf.rio.RDFHandlerException;
+
+public class HResumeExtractorTest extends AbstractExtractorTestCase {
+    protected ExtractorFactory<?> getExtractorFactory() {
+        return new HResumeExtractorFactory();
+    }
+
+    @Test
+    public void testModelNotEmpty() throws RepositoryException, RDFHandlerException {
+        assertExtract("/microformats2/h-resume/h-resume-test.html");
+        assertModelNotEmpty();
+        assertStatementsSize(null, null, null, 12);
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
index b4f8b7a..c58e2a1 100644
--- a/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
+++ b/core/src/test/java/org/apache/any23/vocab/RDFSchemaUtilsTest.java
@@ -43,7 +43,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesNTriples() {
-        serializeVocabularies(RDFFormat.NTRIPLES, 1920);
+        serializeVocabularies(RDFFormat.NTRIPLES, 2012);//1920
     }
 
     /**
@@ -53,7 +53,7 @@ public class RDFSchemaUtilsTest {
      */
     @Test
     public void testSerializeVocabulariesRDFXML() {
-        serializeVocabularies(RDFFormat.RDFXML, 4992); // Effective lines + separators.
+        serializeVocabularies(RDFFormat.RDFXML, 5252); // Effective lines + separators. //4992
     }
 
     private void serializeVocabularies(RDFFormat format, int expectedLines) {

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html b/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
new file mode 100644
index 0000000..f3c8cf7
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-entry/h-entry-test.html
@@ -0,0 +1,53 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+<div class="h-entry">
+    <h1><a class="p-name u-url" href="http://microformats.org/2012/06/25/microformats-org-at-7">microformats.org at 7</a></h1>
+	
+	<p>Published 
+        <time class="dt-published" datetime="2012-03-25T17:08:26">March 25th, 2012</time> 
+    </p>
+	
+    <div class="e-content">
+        <p class="p-summary">Last week the microformats.org community 
+            celebrated its 7th birthday at a gathering hosted by Mozilla in 
+            San Francisco and recognized accomplishments, challenges, and 
+            opportunities.</p>
+
+        <p>The microformats tagline “humans first, machines second” 
+            forms the basis of many of our 
+				<a href="http://microformats.org/wiki/principles">principles</a>, and 
+            in that regard, we’d like to recognize a few people and 
+            thank them for their years of volunteer service </p>
+    </div>
+	
+    <p>Updated 
+        <time class="dt-updated" datetime="2012-06-25T17:08:26">June 25th, 2012</time> 
+    </p>
+	
+	<div class="p-location h-geo">
+		<p>Location
+			<abbr class="p-latitude" title="37.408183">N 37° 24.491</abbr>,  
+			<abbr class="p-longitude" title="-122.13855">W 122° 08.313</abbr>
+		</p>
+	</div>
+</div>
+
+</body>
+
+</html>

http://git-wip-us.apache.org/repos/asf/any23/blob/417b71a7/test-resources/src/test/resources/microformats2/h-resume/h-resume-test.html
----------------------------------------------------------------------
diff --git a/test-resources/src/test/resources/microformats2/h-resume/h-resume-test.html b/test-resources/src/test/resources/microformats2/h-resume/h-resume-test.html
new file mode 100644
index 0000000..15dd835
--- /dev/null
+++ b/test-resources/src/test/resources/microformats2/h-resume/h-resume-test.html
@@ -0,0 +1,49 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE html>
+<html>
+
+<body>
+<div class="h-resume">
+    <p class="p-name">Tim Berners-Lee</p>
+
+    <p class="p-summary">Invented the World Wide Web.</p><hr />
+	
+    <div class="p-education h-event">
+	Education : 
+        <time class="dt-start" datetime="1973-09">1973</time> 
+        <time class="dt-end" datetime="1976-06">1976</time>
+    </div>
+
+    <div class="p-experience h-event">
+        <p>Experiance :
+            <time class="dt-start" datetime="2009-01-18">Jan 2009</time>  Present
+            <time class="dt-duration" datetime="P2Y11M">(2 years 11 month)</time>
+        </p>
+    </div>
+
+    <div>
+	 Skills:     
+    	<ul>
+        	<li class="p-skill">information systems</li>
+        	<li class="p-skill">advocacy</li>
+        	<li class="p-skill">leadership</li>
+    	<ul>
+    </div>   
+
+</div>
+</body>
+
+</html>